mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-23 05:05:02 +00:00
Split puffin-package
into requirements.txt parser and pypi-types
(#341)
There are only two things left in this crate and they don't really have anything to do with one another.
This commit is contained in:
parent
1f447892f3
commit
24e30e6557
60 changed files with 135 additions and 97 deletions
30
crates/pypi-types/Cargo.toml
Normal file
30
crates/pypi-types/Cargo.toml
Normal file
|
@ -0,0 +1,30 @@
|
|||
[package]
|
||||
name = "pypi-types"
|
||||
version = "0.0.1"
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
homepage = { workspace = true }
|
||||
documentation = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
authors = { workspace = true }
|
||||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
pep440_rs = { path = "../pep440-rs", features = ["serde"] }
|
||||
pep508_rs = { path = "../pep508-rs", features = ["serde"] }
|
||||
puffin-normalize = { path = "../puffin-normalize" }
|
||||
|
||||
mailparse = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
rfc2047-decoder = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
indoc = { version = "2.0.4" }
|
||||
insta = { version = "1.34.0" }
|
||||
serde_json = { version = "1.0.108" }
|
||||
tempfile = { version = "3.8.1" }
|
||||
test-case = { version = "3.2.1" }
|
5
crates/pypi-types/src/lib.rs
Normal file
5
crates/pypi-types/src/lib.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
pub use metadata::{Error, Metadata21};
|
||||
pub use simple_json::{File, SimpleJson};
|
||||
|
||||
mod metadata;
|
||||
mod simple_json;
|
342
crates/pypi-types/src/metadata.rs
Normal file
342
crates/pypi-types/src/metadata.rs
Normal file
|
@ -0,0 +1,342 @@
|
|||
//! Derived from `pypi_types_crate`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::str::FromStr;
|
||||
|
||||
use mailparse::{MailHeaderMap, MailParseError};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tracing::warn;
|
||||
|
||||
use pep440_rs::{Pep440Error, Version, VersionSpecifiers};
|
||||
use pep508_rs::{Pep508Error, Requirement};
|
||||
use puffin_normalize::{ExtraName, InvalidNameError, PackageName};
|
||||
|
||||
/// Python Package Metadata 2.1 as specified in
|
||||
/// <https://packaging.python.org/specifications/core-metadata/>
|
||||
///
|
||||
/// One addition is the requirements fixup which insert missing commas e.g. in
|
||||
/// `elasticsearch-dsl (>=7.2.0<8.0.0)`
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct Metadata21 {
|
||||
// Mandatory fields
|
||||
pub metadata_version: String,
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
// Optional fields
|
||||
pub platforms: Vec<String>,
|
||||
pub supported_platforms: Vec<String>,
|
||||
pub summary: Option<String>,
|
||||
pub description: Option<String>,
|
||||
pub description_content_type: Option<String>,
|
||||
pub keywords: Option<String>,
|
||||
pub home_page: Option<String>,
|
||||
pub download_url: Option<String>,
|
||||
pub author: Option<String>,
|
||||
pub author_email: Option<String>,
|
||||
pub maintainer: Option<String>,
|
||||
pub maintainer_email: Option<String>,
|
||||
pub license: Option<String>,
|
||||
pub classifiers: Vec<String>,
|
||||
pub requires_dist: Vec<Requirement>,
|
||||
pub provides_dist: Vec<PackageName>,
|
||||
pub obsoletes_dist: Vec<String>,
|
||||
pub requires_python: Option<VersionSpecifiers>,
|
||||
pub requires_external: Vec<String>,
|
||||
pub project_urls: HashMap<String, String>,
|
||||
pub provides_extras: Vec<ExtraName>,
|
||||
}
|
||||
|
||||
/// <https://github.com/PyO3/python-pkginfo-rs/blob/d719988323a0cfea86d4737116d7917f30e819e2/src/error.rs>
|
||||
///
|
||||
/// The error type
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
/// I/O error
|
||||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
/// mail parse error
|
||||
#[error(transparent)]
|
||||
MailParse(#[from] MailParseError),
|
||||
/// Metadata field not found
|
||||
#[error("metadata field {0} not found")]
|
||||
FieldNotFound(&'static str),
|
||||
/// Unknown distribution type
|
||||
#[error("unknown distribution type")]
|
||||
UnknownDistributionType,
|
||||
/// Metadata file not found
|
||||
#[error("metadata file not found")]
|
||||
MetadataNotFound,
|
||||
/// Invalid project URL (no comma)
|
||||
#[error("Invalid Project-URL field (missing comma): '{0}'")]
|
||||
InvalidProjectUrl(String),
|
||||
/// Multiple metadata files found
|
||||
#[error("found multiple metadata files: {0:?}")]
|
||||
MultipleMetadataFiles(Vec<String>),
|
||||
/// Invalid Version
|
||||
#[error("invalid version: {0}")]
|
||||
Pep440VersionError(String),
|
||||
/// Invalid VersionSpecifier
|
||||
#[error(transparent)]
|
||||
Pep440Error(#[from] Pep440Error),
|
||||
/// Invalid Requirement
|
||||
#[error(transparent)]
|
||||
Pep508Error(#[from] Pep508Error),
|
||||
#[error(transparent)]
|
||||
InvalidName(#[from] InvalidNameError),
|
||||
}
|
||||
|
||||
/// From <https://github.com/PyO3/python-pkginfo-rs/blob/d719988323a0cfea86d4737116d7917f30e819e2/src/metadata.rs#LL78C2-L91C26>
|
||||
impl Metadata21 {
|
||||
/// Parse distribution metadata from metadata bytes
|
||||
pub fn parse(content: &[u8]) -> Result<Self, Error> {
|
||||
// HACK: trick mailparse to parse as UTF-8 instead of ASCII
|
||||
let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec();
|
||||
mail.extend_from_slice(content);
|
||||
|
||||
let msg = mailparse::parse_mail(&mail)?;
|
||||
let headers = msg.get_headers();
|
||||
let get_first_value = |name| {
|
||||
headers.get_first_header(name).and_then(|header| {
|
||||
match rfc2047_decoder::decode(header.get_value_raw()) {
|
||||
Ok(value) => {
|
||||
if value == "UNKNOWN" {
|
||||
None
|
||||
} else {
|
||||
Some(value)
|
||||
}
|
||||
}
|
||||
Err(_) => None,
|
||||
}
|
||||
})
|
||||
};
|
||||
let get_all_values = |name| {
|
||||
let values: Vec<String> = headers
|
||||
.get_all_values(name)
|
||||
.into_iter()
|
||||
.filter(|value| value != "UNKNOWN")
|
||||
.collect();
|
||||
values
|
||||
};
|
||||
let metadata_version = headers
|
||||
.get_first_value("Metadata-Version")
|
||||
.ok_or(Error::FieldNotFound("Metadata-Version"))?;
|
||||
let name = PackageName::new(
|
||||
headers
|
||||
.get_first_value("Name")
|
||||
.ok_or(Error::FieldNotFound("Name"))?,
|
||||
)?;
|
||||
let version = Version::from_str(
|
||||
&headers
|
||||
.get_first_value("Version")
|
||||
.ok_or(Error::FieldNotFound("Version"))?,
|
||||
)
|
||||
.map_err(Error::Pep440VersionError)?;
|
||||
let platforms = get_all_values("Platform");
|
||||
let supported_platforms = get_all_values("Supported-Platform");
|
||||
let summary = get_first_value("Summary");
|
||||
let body = msg.get_body()?;
|
||||
let description = if body.trim().is_empty() {
|
||||
get_first_value("Description")
|
||||
} else {
|
||||
Some(body)
|
||||
};
|
||||
let keywords = get_first_value("Keywords");
|
||||
let home_page = get_first_value("Home-Page");
|
||||
let download_url = get_first_value("Download-URL");
|
||||
let author = get_first_value("Author");
|
||||
let author_email = get_first_value("Author-email");
|
||||
let license = get_first_value("License");
|
||||
let classifiers = get_all_values("Classifier");
|
||||
let requires_dist = get_all_values("Requires-Dist")
|
||||
.iter()
|
||||
.map(|requires_dist| LenientRequirement::from_str(requires_dist).map(Requirement::from))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let provides_dist = get_all_values("Provides-Dist")
|
||||
.into_iter()
|
||||
.map(PackageName::new)
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let obsoletes_dist = get_all_values("Obsoletes-Dist");
|
||||
let maintainer = get_first_value("Maintainer");
|
||||
let maintainer_email = get_first_value("Maintainer-email");
|
||||
let requires_python = get_first_value("Requires-Python")
|
||||
.map(|requires_python| {
|
||||
LenientVersionSpecifiers::from_str(&requires_python).map(VersionSpecifiers::from)
|
||||
})
|
||||
.transpose()?;
|
||||
let requires_external = get_all_values("Requires-External");
|
||||
let project_urls = get_all_values("Project-URL")
|
||||
.iter()
|
||||
.map(|name_value| match name_value.split_once(',') {
|
||||
None => Err(Error::InvalidProjectUrl(name_value.clone())),
|
||||
Some((name, value)) => Ok((name.to_string(), value.trim().to_string())),
|
||||
})
|
||||
.collect::<Result<_, _>>()?;
|
||||
let provides_extras = get_all_values("Provides-Extra")
|
||||
.into_iter()
|
||||
.map(ExtraName::new)
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let description_content_type = get_first_value("Description-Content-Type");
|
||||
Ok(Metadata21 {
|
||||
metadata_version,
|
||||
name,
|
||||
version,
|
||||
platforms,
|
||||
supported_platforms,
|
||||
summary,
|
||||
description,
|
||||
description_content_type,
|
||||
keywords,
|
||||
home_page,
|
||||
download_url,
|
||||
author,
|
||||
author_email,
|
||||
maintainer,
|
||||
maintainer_email,
|
||||
license,
|
||||
classifiers,
|
||||
requires_dist,
|
||||
provides_dist,
|
||||
obsoletes_dist,
|
||||
requires_python,
|
||||
requires_external,
|
||||
project_urls,
|
||||
provides_extras,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
static MISSING_COMMA: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d)([<>=~^!])").unwrap());
|
||||
|
||||
static NOT_EQUAL_TILDE: Lazy<Regex> = Lazy::new(|| Regex::new(r"!=~((?:\d\.)*\d)").unwrap());
|
||||
|
||||
/// Like [`Requirement`], but attempts to correct some common errors in user-provided requirements.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
struct LenientRequirement(Requirement);
|
||||
|
||||
impl FromStr for LenientRequirement {
|
||||
type Err = Pep508Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match Requirement::from_str(s) {
|
||||
Ok(requirement) => Ok(Self(requirement)),
|
||||
Err(err) => {
|
||||
// Given `elasticsearch-dsl (>=7.2.0<8.0.0)`, rewrite to `elasticsearch-dsl (>=7.2.0,<8.0.0)`.
|
||||
let patched = MISSING_COMMA.replace(s, r"$1,$2");
|
||||
if patched != s {
|
||||
if let Ok(requirement) = Requirement::from_str(&patched) {
|
||||
warn!(
|
||||
"Inserting missing comma into invalid requirement (before: `{s}`; after: `{patched}`)",
|
||||
);
|
||||
return Ok(Self(requirement));
|
||||
}
|
||||
}
|
||||
|
||||
// Given `jupyter-core (!=~5.0,>=4.12)`, rewrite to `jupyter-core (!=5.0.*,>=4.12)`.
|
||||
let patched = NOT_EQUAL_TILDE.replace(s, r"!=${1}.*");
|
||||
if patched != s {
|
||||
if let Ok(requirement) = Requirement::from_str(&patched) {
|
||||
warn!(
|
||||
"Adding wildcard after invalid tilde operator (before: `{s}`; after: `{patched}`)",
|
||||
);
|
||||
return Ok(Self(requirement));
|
||||
}
|
||||
}
|
||||
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LenientRequirement> for Requirement {
|
||||
fn from(requirement: LenientRequirement) -> Self {
|
||||
requirement.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`VersionSpecifiers`], but attempts to correct some common errors in user-provided requirements.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
struct LenientVersionSpecifiers(VersionSpecifiers);
|
||||
|
||||
impl FromStr for LenientVersionSpecifiers {
|
||||
type Err = Pep440Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match VersionSpecifiers::from_str(s) {
|
||||
Ok(specifiers) => Ok(Self(specifiers)),
|
||||
Err(err) => {
|
||||
// Given `>=3.5.*`, rewrite to `>=3.5`.
|
||||
let patched = match s {
|
||||
">=3.12.*" => Some(">=3.12"),
|
||||
">=3.11.*" => Some(">=3.11"),
|
||||
">=3.10.*" => Some(">=3.10"),
|
||||
">=3.9.*" => Some(">=3.9"),
|
||||
">=3.8.*" => Some(">=3.8"),
|
||||
">=3.7.*" => Some(">=3.7"),
|
||||
">=3.6.*" => Some(">=3.6"),
|
||||
">=3.5.*" => Some(">=3.5"),
|
||||
">=3.4.*" => Some(">=3.4"),
|
||||
">=3.3.*" => Some(">=3.3"),
|
||||
">=3.2.*" => Some(">=3.2"),
|
||||
">=3.1.*" => Some(">=3.1"),
|
||||
">=3.0.*" => Some(">=3.0"),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(patched) = patched {
|
||||
if let Ok(specifier) = VersionSpecifiers::from_str(patched) {
|
||||
warn!(
|
||||
"Correcting invalid wildcard bound on version specifier (before: `{s}`; after: `{patched}`)",
|
||||
);
|
||||
return Ok(Self(specifier));
|
||||
}
|
||||
}
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LenientVersionSpecifiers> for VersionSpecifiers {
|
||||
fn from(specifiers: LenientVersionSpecifiers) -> Self {
|
||||
specifiers.0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use pep508_rs::Requirement;
|
||||
|
||||
use super::LenientRequirement;
|
||||
|
||||
#[test]
|
||||
fn missing_comma() {
|
||||
let actual: Requirement = LenientRequirement::from_str("elasticsearch-dsl (>=7.2.0<8.0.0)")
|
||||
.unwrap()
|
||||
.into();
|
||||
let expected: Requirement =
|
||||
Requirement::from_str("elasticsearch-dsl (>=7.2.0,<8.0.0)").unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_equal_tile() {
|
||||
let actual: Requirement = LenientRequirement::from_str("jupyter-core (!=~5.0,>=4.12)")
|
||||
.unwrap()
|
||||
.into();
|
||||
let expected: Requirement = Requirement::from_str("jupyter-core (!=5.0.*,>=4.12)").unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let actual: Requirement = LenientRequirement::from_str("jupyter-core (!=~5,>=4.12)")
|
||||
.unwrap()
|
||||
.into();
|
||||
let expected: Requirement = Requirement::from_str("jupyter-core (!=5.*,>=4.12)").unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
}
|
59
crates/pypi-types/src/simple_json.rs
Normal file
59
crates/pypi-types/src/simple_json.rs
Normal file
|
@ -0,0 +1,59 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SimpleJson {
|
||||
pub files: Vec<File>,
|
||||
pub meta: Meta,
|
||||
pub name: String,
|
||||
pub versions: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct File {
|
||||
pub core_metadata: Metadata,
|
||||
pub data_dist_info_metadata: Metadata,
|
||||
pub filename: String,
|
||||
pub hashes: Hashes,
|
||||
pub requires_python: Option<String>,
|
||||
pub size: usize,
|
||||
pub upload_time: String,
|
||||
pub url: String,
|
||||
pub yanked: Yanked,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Metadata {
|
||||
Bool(bool),
|
||||
Hashes(Hashes),
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
pub fn is_available(&self) -> bool {
|
||||
match self {
|
||||
Self::Bool(is_available) => *is_available,
|
||||
Self::Hashes(_) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Yanked {
|
||||
Bool(bool),
|
||||
Reason(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Hashes {
|
||||
pub sha256: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct Meta {
|
||||
#[serde(rename = "_last-serial")]
|
||||
pub last_serial: i64,
|
||||
pub api_version: String,
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue