Add support for HTML indexes (#719)

## Summary This PR adds support for HTML index responses (as with `--index-url=https://download.pytorch.org/whl`). Closes https://github.com/astral-sh/puffin/issues/412.
2025-08-03 10:33:49 +00:00 · 2023-12-24 11:04:00 -05:00 · 2023-12-24 11:04:00 -05:00 · 5bce699ee1
commit 5bce699ee1
parent 9e6cb706a0
11 changed files with 465 additions and 19 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2373,6 +2373,7 @@ dependencies = [
 "futures",
 "http",
 "http-cache-semantics",
 "insta",
 "install-wheel-rs",
 "pep440_rs 0.3.12",
 "pep508_rs",
@ -2389,6 +2390,7 @@ dependencies = [
 "sha2",
 "tempfile",
 "thiserror",
 "tl",
 "tokio",
 "tokio-util",
 "tracing",
@ -3701,6 +3703,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 [[package]]
 name = "tl"
 version = "0.7.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d5e993a1c7c32fdf90a308cec4d457f507b2573acc909bd6e7a092321664fdb3"
 [[package]]
 name = "tokio"
 version = "1.33.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -75,6 +75,7 @@ target-lexicon = { version = "0.12.12" }
 tempfile = { version = "3.8.1" }
 textwrap = { version = "0.15.2" }
 thiserror = { version = "1.0.50" }
 tl = { version = "0.7.7" }
 tokio = { version = "1.33.0", features = ["rt-multi-thread"] }
 tokio-util = { version = "0.7.10", features = ["compat"] }
 toml = { version = "0.8.6" }
--- a/crates/distribution-filename/src/source_dist.rs
+++ b/crates/distribution-filename/src/source_dist.rs
@ -8,7 +8,8 @@ use thiserror::Error;
 use pep440_rs::Version;
 use puffin_normalize::{InvalidNameError, PackageName};
-#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub enum SourceDistExtension {
    Zip,
    TarGz,
--- a/crates/puffin-cli/tests/pip_compile.rs
+++ b/crates/puffin-cli/tests/pip_compile.rs
@ -2701,3 +2701,42 @@ fn cache_errors_are_non_fatal() -> Result<()> {
        Ok(())
    }
 }
 /// Resolve a distribution from an HTML-only registry.
 #[test]
 fn compile_html() -> Result<()> {
    let temp_dir = TempDir::new()?;
    let cache_dir = TempDir::new()?;
    let venv = create_venv_py312(&temp_dir, &cache_dir);
    let requirements_in = temp_dir.child("requirements.in");
    requirements_in.write_str("jinja2<=3.1.2")?;
    insta::with_settings!({
        filters => INSTA_FILTERS.to_vec()
    }, {
        assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
            .arg("pip-compile")
            .arg("requirements.in")
            .arg("--cache-dir")
            .arg(cache_dir.path())
            .arg("--index-url")
            .arg("https://download.pytorch.org/whl")
            .env("VIRTUAL_ENV", venv.as_os_str())
            .current_dir(&temp_dir), @r###"
        success: true
        exit_code: 0
        ----- stdout -----
        # This file was autogenerated by Puffin v0.0.1 via the following command:
        #    puffin pip-compile requirements.in --cache-dir [CACHE_DIR]
        jinja2==3.1.2
        markupsafe==2.1.3
            # via jinja2
        ----- stderr -----
        Resolved 2 packages in [TIME]
        "###);
    });
    Ok(())
 }
--- a/crates/puffin-client/Cargo.toml
+++ b/crates/puffin-client/Cargo.toml
@ -4,7 +4,7 @@ version = "0.0.1"
 edition = "2021"
 [dependencies]
-distribution-filename = { path = "../distribution-filename" }
+distribution-filename = { path = "../distribution-filename", features = ["serde"] }
 distribution-types = { path = "../distribution-types" }
 install-wheel-rs = { path = "../install-wheel-rs" }
 pep440_rs = { path = "../pep440-rs" }
@ -28,6 +28,7 @@ serde_json = { workspace = true }
 sha2 = { workspace = true }
 tempfile = { workspace = true }
 thiserror = { workspace = true }
 tl = { workspace = true }
 tokio = { workspace = true, features = ["fs"] }
 tokio-util = { workspace = true }
 tracing = { workspace = true }
@ -37,4 +38,5 @@ url = { workspace = true }
 pep508_rs = { path = "../pep508-rs" }
 anyhow = { workspace = true }
 insta = { version = "1.34.0" }
 tokio = { workspace = true, features = ["fs", "macros"] }
--- a/crates/puffin-client/src/error.rs
+++ b/crates/puffin-client/src/error.rs
@ -5,6 +5,7 @@ use async_zip::error::ZipError;
 use thiserror::Error;
 use url::Url;
 use crate::html;
 use distribution_filename::{WheelFilename, WheelFilenameError};
 use puffin_normalize::PackageName;
@ -49,6 +50,9 @@ pub enum Error {
    #[error("Received some unexpected JSON from {url}")]
    BadJson { source: serde_json::Error, url: Url },
    #[error("Received some unexpected HTML from {url}")]
    BadHtml { source: html::Error, url: Url },
    #[error(transparent)]
    AsyncHttpRangeReader(#[from] AsyncHttpRangeReaderError),
@ -82,10 +86,23 @@ pub enum Error {
    /// An [`io::Error`] with a filename attached
    #[error(transparent)]
    Persist(#[from] tempfile::PersistError),
    #[error("Missing `Content-Type` header for {0}")]
    MissingContentType(Url),
    #[error("Invalid `Content-Type` header for {0}")]
    InvalidContentTypeHeader(Url, #[source] http::header::ToStrError),
    #[error("Unsupported `Content-Type` \"{1}\" for {0}")]
    UnsupportedMediaType(Url, String),
 }
 impl Error {
-    pub fn from_json_err(err: serde_json::Error, url: Url) -> Self {
+    pub(crate) fn from_json_err(err: serde_json::Error, url: Url) -> Self {
        Self::BadJson { source: err, url }
    }
    pub(crate) fn from_html_err(err: html::Error, url: Url) -> Self {
        Self::BadHtml { source: err, url }
    }
 }
--- a/crates/puffin-client/src/html.rs
+++ b/crates/puffin-client/src/html.rs
@ -0,0 +1,332 @@
 use std::str::FromStr;
 use tl::HTMLTag;
 use url::Url;
 use pep440_rs::VersionSpecifiers;
 use pypi_types::{DistInfoMetadata, File, Hashes, Yanked};
 #[derive(Debug, thiserror::Error)]
 pub enum Error {
    #[error(transparent)]
    Utf8(#[from] std::str::Utf8Error),
    #[error(transparent)]
    UrlParse(#[from] url::ParseError),
    #[error("Missing href attribute on URL: {0}")]
    MissingHref(Url),
    #[error("Expected distribution filename as last path component of URL: {0}")]
    MissingFilename(Url),
    #[error("Missing hash attribute on URL: {0}")]
    MissingHash(Url),
    #[error("Unexpected fragment (expected `#sha256=...`) on URL: {0}")]
    FragmentParse(Url),
    #[error("Unsupported hash algorithm (expected `sha256`) on: {0}")]
    UnsupportedHashAlgorithm(Url),
    #[error("Invalid `requires-python` specifier: {0}")]
    Pep440(#[source] pep440_rs::Pep440Error),
 }
 /// Parse the list of [`File`]s from the simple HTML page returned by the given URL.
 pub(crate) fn parse_simple(text: &str, base: &Url) -> Result<Vec<File>, Error> {
    let dom = tl::parse(text, tl::ParserOptions::default()).unwrap();
    // Parse the first `<base>` tag, if any, to determine the base URL to which all
    // relative URLs should be resolved. The HTML spec requires that the `<base>` tag
    // appear before other tags with attribute values of URLs.
    let base = dom
        .nodes()
        .iter()
        .filter_map(|node| node.as_tag())
        .take_while(|tag| !matches!(tag.name().as_bytes(), b"a" | b"link"))
        .find(|tag| tag.name().as_bytes() == b"base")
        .map(|base| parse_base(base))
        .transpose()?
        .flatten()
        .unwrap_or_else(|| base.clone());
    // Parse each `<a>` tag, to extract the filename, hash, and URL.
    let files: Vec<File> = dom
        .nodes()
        .iter()
        .filter_map(|node| node.as_tag())
        .filter(|link| link.name().as_bytes() == b"a")
        .map(|link| parse_anchor(link, &base))
        .collect::<Result<Vec<_>, _>>()?;
    Ok(files)
 }
 /// Parse the `href` from a `<base>` tag.
 fn parse_base(base: &HTMLTag) -> Result<Option<Url>, Error> {
    let Some(Some(href)) = base.attributes().get("href") else {
        return Ok(None);
    };
    let href = std::str::from_utf8(href.as_bytes())?;
    let url = Url::parse(href)?;
    Ok(Some(url))
 }
 /// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
 fn parse_hash(fragment: &str, url: &Url) -> Result<Hashes, Error> {
    let mut parts = fragment.split('=');
    // Extract the key and value.
    let name = parts
        .next()
        .ok_or_else(|| Error::FragmentParse(url.clone()))?;
    let value = parts
        .next()
        .ok_or_else(|| Error::FragmentParse(url.clone()))?;
    // Ensure there are no more parts.
    if parts.next().is_some() {
        return Err(Error::FragmentParse(url.clone()));
    }
    // TODO(charlie): Support all hash algorithms.
    if name != "sha256" {
        return Err(Error::UnsupportedHashAlgorithm(url.clone()));
    }
    let sha256 = std::str::from_utf8(value.as_bytes())?;
    let sha256 = sha256.to_string();
    Ok(Hashes { sha256 })
 }
 /// Parse a [`File`] from an `<a>` tag.
 fn parse_anchor(link: &HTMLTag, base: &Url) -> Result<File, Error> {
    // Extract the href.
    let href = link
        .attributes()
        .get("href")
        .flatten()
        .ok_or_else(|| Error::MissingHref(base.clone()))?;
    let href = std::str::from_utf8(href.as_bytes())?;
    let url = base.join(href)?;
    // Extract the filename from the body text, which MUST match that of
    // the final path component of the URL.
    let filename = url
        .path_segments()
        .and_then(|segments| segments.last())
        .ok_or_else(|| Error::MissingFilename(url.clone()))?;
    // Extract the hash, which should be in the fragment.
    let hashes = url
        .fragment()
        .map(|fragment| parse_hash(fragment, &url))
        .transpose()?
        .ok_or_else(|| Error::MissingHash(url.clone()))?;
    // Extract the `requires-python` field, which should be set on the
    // `data-requires-python` attribute.
    let requires_python =
        if let Some(requires_python) = link.attributes().get("data-requires-python").flatten() {
            let requires_python = std::str::from_utf8(requires_python.as_bytes())?;
            let requires_python =
                VersionSpecifiers::from_str(requires_python).map_err(Error::Pep440)?;
            Some(requires_python)
        } else {
            None
        };
    // Extract the `data-dist-info-metadata` field, which should be set on
    // the `data-dist-info-metadata` attribute.
    let dist_info_metadata = if let Some(dist_info_metadata) =
        link.attributes().get("data-dist-info-metadata").flatten()
    {
        let dist_info_metadata = std::str::from_utf8(dist_info_metadata.as_bytes())?;
        match dist_info_metadata {
            "true" => Some(DistInfoMetadata::Bool(true)),
            "false" => Some(DistInfoMetadata::Bool(false)),
            fragment => Some(DistInfoMetadata::Hashes(parse_hash(fragment, &url)?)),
        }
    } else {
        None
    };
    // Extract the `yanked` field, which should be set on the `data-yanked`
    // attribute.
    let yanked = if let Some(yanked) = link.attributes().get("data-yanked").flatten() {
        let yanked = std::str::from_utf8(yanked.as_bytes())?;
        Some(Yanked::Reason(yanked.to_string()))
    } else {
        None
    };
    Ok(File {
        dist_info_metadata,
        yanked,
        requires_python,
        hashes,
        filename: filename.to_string(),
        // TODO(charlie): Store serialized URLs.
        url: url.to_string(),
        size: None,
        upload_time: None,
    })
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn parse_file() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <body>
    <h1>Links for jinja2</h1>
    <a href="/whl/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61">Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap();
        insta::assert_debug_snapshot!(result, @r###"
        [
            File {
                dist_info_metadata: None,
                filename: "Jinja2-3.1.2-py3-none-any.whl",
                hashes: Hashes {
                    sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
                },
                requires_python: None,
                size: None,
                upload_time: None,
                url: "https://download.pytorch.org/whl/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
                yanked: None,
            },
        ]
        "###);
    }
    #[test]
    fn parse_base() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <head>
    <base href="https://index.python.org/">
  </head>
  <body>
    <h1>Links for jinja2</h1>
    <a href="/whl/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61">Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap();
        insta::assert_debug_snapshot!(result, @r###"
        [
            File {
                dist_info_metadata: None,
                filename: "Jinja2-3.1.2-py3-none-any.whl",
                hashes: Hashes {
                    sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
                },
                requires_python: None,
                size: None,
                upload_time: None,
                url: "https://index.python.org/whl/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
                yanked: None,
            },
        ]
        "###);
    }
    #[test]
    fn parse_missing_href() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <body>
    <h1>Links for jinja2</h1>
    <a>Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap_err();
        insta::assert_display_snapshot!(result, @"Missing href attribute on URL: https://download.pytorch.org/whl/jinja2/");
    }
    #[test]
    fn parse_empty_href() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <body>
    <h1>Links for jinja2</h1>
    <a href="">Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap_err();
        insta::assert_display_snapshot!(result, @"Missing hash attribute on URL: https://download.pytorch.org/whl/jinja2/");
    }
    #[test]
    fn parse_missing_hash() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <body>
    <h1>Links for jinja2</h1>
    <a href="/whl/Jinja2-3.1.2-py3-none-any.whl">Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap_err();
        insta::assert_display_snapshot!(result, @"Missing hash attribute on URL: https://download.pytorch.org/whl/Jinja2-3.1.2-py3-none-any.whl");
    }
    #[test]
    fn parse_missing_hash_value() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <body>
    <h1>Links for jinja2</h1>
    <a href="/whl/Jinja2-3.1.2-py3-none-any.whl#sha256">Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap_err();
        insta::assert_display_snapshot!(result, @"Unexpected fragment (expected `#sha256=...`) on URL: https://download.pytorch.org/whl/Jinja2-3.1.2-py3-none-any.whl#sha256");
    }
    #[test]
    fn parse_unknown_hash() {
        let text = r#"
 <!DOCTYPE html>
 <html>
  <body>
    <h1>Links for jinja2</h1>
    <a href="/whl/Jinja2-3.1.2-py3-none-any.whl#sha512=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61">Jinja2-3.1.2-py3-none-any.whl</a><br/>
  </body>
 </html>
 <!--TIMESTAMP 1703347410-->
        "#;
        let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
        let result = parse_simple(text, &base).unwrap_err();
        insta::assert_display_snapshot!(result, @"Unsupported hash algorithm (expected `sha256`) on: https://download.pytorch.org/whl/Jinja2-3.1.2-py3-none-any.whl#sha512=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61");
    }
 }
--- a/crates/puffin-client/src/lib.rs
+++ b/crates/puffin-client/src/lib.rs
@ -6,5 +6,6 @@ pub use registry_client::{
 mod cached_client;
 mod error;
 mod html;
 mod registry_client;
 mod remote_metadata;
--- a/crates/puffin-client/src/registry_client.rs
+++ b/crates/puffin-client/src/registry_client.rs
@ -25,7 +25,7 @@ use puffin_normalize::PackageName;
 use pypi_types::{File, IndexUrl, IndexUrls, Metadata21, SimpleJson};
 use crate::remote_metadata::wheel_metadata_from_remote_zip;
-use crate::{CachedClient, CachedClientError, Error};
+use crate::{html, CachedClient, CachedClientError, Error};
 /// A builder for an [`RegistryClient`].
 #[derive(Debug, Clone)]
@ -135,7 +135,6 @@ impl RegistryClient {
            let mut url: Url = index.clone().into();
            url.path_segments_mut().unwrap().push(package_name.as_ref());
            url.path_segments_mut().unwrap().push("");
            url.set_query(Some("format=application/vnd.pypi.simple.v1+json"));
            trace!("Fetching metadata for {} from {}", package_name, url);
@ -153,14 +152,37 @@ impl RegistryClient {
                .uncached()
                .get(url.clone())
                .header("Accept-Encoding", "gzip")
-                .header("Accept", "application/vnd.pypi.simple.v1+json")
+                .header("Accept", MediaType::accepts())
                .build()?;
            let parse_simple_response = |response: Response| async {
-                let bytes = response.bytes().await?;
+                let content_type = response
-                let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
+                    .headers()
-                    .map_err(|err| Error::from_json_err(err, url))?;
+                    .get("content-type")
-                let metadata = SimpleMetadata::from_package_simple_json(package_name, data);
+                    .ok_or_else(|| Error::MissingContentType(url.clone()))?;
-                Ok(metadata)
+                let content_type = content_type
                    .to_str()
                    .map_err(|err| Error::InvalidContentTypeHeader(url.clone(), err))?;
                let media_type = content_type.split(';').next().unwrap_or(content_type);
                let media_type = MediaType::from_str(media_type).ok_or_else(|| {
                    Error::UnsupportedMediaType(url.clone(), media_type.to_string())
                })?;
                match media_type {
                    MediaType::Json => {
                        let bytes = response.bytes().await?;
                        let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
                            .map_err(|err| Error::from_json_err(err, url.clone()))?;
                        let metadata = SimpleMetadata::from_files(package_name, data.files);
                        Ok(metadata)
                    }
                    MediaType::Html => {
                        let text = response.text().await?;
                        let files = html::parse_simple(&text, &url)
                            .map_err(|err| Error::from_html_err(err, url.clone()))?;
                        let metadata = SimpleMetadata::from_files(package_name, files);
                        Ok(metadata)
                    }
                }
            };
            let result = self
                .client
@ -189,7 +211,7 @@ impl RegistryClient {
    /// Fetch the metadata for a remote wheel file.
    ///
-    /// For a remote wheel, we try the following ways to fetch the metadata:  
+    /// For a remote wheel, we try the following ways to fetch the metadata:
    /// 1. From a [PEP 658](https://peps.python.org/pep-0658/) data-dist-info-metadata url
    /// 2. From a remote wheel by partial zip reading
    /// 3. From a (temp) download of a remote wheel (this is a fallback, the webserver should support range requests)
@ -239,7 +261,7 @@ impl RegistryClient {
        if file
            .dist_info_metadata
            .as_ref()
-            .is_some_and(pypi_types::Metadata::is_available)
+            .is_some_and(pypi_types::DistInfoMetadata::is_available)
        {
            let url = Url::parse(&format!("{}.metadata", file.url))?;
@ -423,11 +445,11 @@ impl SimpleMetadata {
        self.0.iter()
    }
-    fn from_package_simple_json(package_name: &PackageName, simple_json: SimpleJson) -> Self {
+    fn from_files(package_name: &PackageName, files: Vec<File>) -> Self {
        let mut metadata = Self::default();
        // Group the distributions by version and kind
-        for file in simple_json.files {
+        for file in files {
            if let Some(filename) =
                DistFilename::try_from_filename(file.filename.as_str(), package_name)
            {
@ -461,3 +483,26 @@ impl IntoIterator for SimpleMetadata {
        self.0.into_iter()
    }
 }
 #[derive(Debug)]
 enum MediaType {
    Json,
    Html,
 }
 impl MediaType {
    /// Parse a media type from a string, returning `None` if the media type is not supported.
    fn from_str(s: &str) -> Option<Self> {
        match s {
            "application/vnd.pypi.simple.v1+json" => Some(Self::Json),
            "application/vnd.pypi.simple.v1+html" | "text/html" => Some(Self::Html),
            _ => None,
        }
    }
    /// Return the `Accept` header value for all supported media types.
    #[inline]
    const fn accepts() -> &'static str {
        "application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html;q=0.2, text/html"
    }
 }
--- a/crates/pypi-types/src/lib.rs
+++ b/crates/pypi-types/src/lib.rs
@ -2,7 +2,7 @@ pub use direct_url::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind};
 pub use index_url::{IndexUrl, IndexUrls};
 pub use lenient_requirement::LenientVersionSpecifiers;
 pub use metadata::{Error, Metadata21};
-pub use simple_json::{File, Metadata, SimpleJson, Yanked};
+pub use simple_json::{DistInfoMetadata, File, Hashes, SimpleJson, Yanked};
 mod direct_url;
 mod index_url;
--- a/crates/pypi-types/src/simple_json.rs
+++ b/crates/pypi-types/src/simple_json.rs
@ -18,7 +18,7 @@ pub struct SimpleJson {
 pub struct File {
    // Not PEP 691 compliant alias used by pypi
    #[serde(alias = "data_dist_info_metadata")]
-    pub dist_info_metadata: Option<Metadata>,
+    pub dist_info_metadata: Option<DistInfoMetadata>,
    pub filename: String,
    pub hashes: Hashes,
    /// Note: Deserialized with [`LenientVersionSpecifiers`] since there are a number of invalid
@ -47,12 +47,12 @@ where
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
-pub enum Metadata {
+pub enum DistInfoMetadata {
    Bool(bool),
    Hashes(Hashes),
 }
-impl Metadata {
+impl DistInfoMetadata {
    pub fn is_available(&self) -> bool {
        match self {
            Self::Bool(is_available) => *is_available,