Add support for lenient parsing (#115)

This PR enables us to make "fixups" to bad metadata. I copied over the
one fixup that @konstin made in `monotrail-resolve`, and added a few
common ones for `Requires-Python`.
This commit is contained in:
Charlie Marsh 2023-10-17 22:03:16 -04:00 committed by GitHub
parent 0d90256151
commit 89db5d79bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 113 additions and 3 deletions

View file

@ -5,8 +5,11 @@ use std::io;
use std::str::FromStr; use std::str::FromStr;
use mailparse::{MailHeaderMap, MailParseError}; use mailparse::{MailHeaderMap, MailParseError};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use thiserror::Error; use thiserror::Error;
use tracing::warn;
use pep440_rs::{Pep440Error, Version, VersionSpecifiers}; use pep440_rs::{Pep440Error, Version, VersionSpecifiers};
use pep508_rs::{Pep508Error, Requirement}; use pep508_rs::{Pep508Error, Requirement};
@ -146,14 +149,16 @@ impl Metadata21 {
let classifiers = get_all_values("Classifier"); let classifiers = get_all_values("Classifier");
let requires_dist = get_all_values("Requires-Dist") let requires_dist = get_all_values("Requires-Dist")
.iter() .iter()
.map(|requires_dist| Requirement::from_str(requires_dist)) .map(|requires_dist| LenientRequirement::from_str(requires_dist).map(Requirement::from))
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
let provides_dist = get_all_values("Provides-Dist"); let provides_dist = get_all_values("Provides-Dist");
let obsoletes_dist = get_all_values("Obsoletes-Dist"); let obsoletes_dist = get_all_values("Obsoletes-Dist");
let maintainer = get_first_value("Maintainer"); let maintainer = get_first_value("Maintainer");
let maintainer_email = get_first_value("Maintainer-email"); let maintainer_email = get_first_value("Maintainer-email");
let requires_python = get_first_value("Requires-Python") let requires_python = get_first_value("Requires-Python")
.map(|requires_python| VersionSpecifiers::from_str(&requires_python)) .map(|requires_python| {
LenientVersionSpecifiers::from_str(&requires_python).map(VersionSpecifiers::from)
})
.transpose()?; .transpose()?;
let requires_external = get_all_values("Requires-External"); let requires_external = get_all_values("Requires-External");
let project_urls = get_all_values("Project-URL") let project_urls = get_all_values("Project-URL")
@ -193,3 +198,86 @@ impl Metadata21 {
}) })
} }
} }
static REQUIREMENT_FIXUP_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d)([<>=~^!])").unwrap());
/// Like [`Requirement`], but attempts to correct some common errors in user-provided requirements.
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
struct LenientRequirement(Requirement);
impl FromStr for LenientRequirement {
type Err = Pep508Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match Requirement::from_str(s) {
Ok(requirement) => Ok(Self(requirement)),
Err(err) => {
// Given `elasticsearch-dsl (>=7.2.0<8.0.0)`, rewrite to `elasticsearch-dsl (>=7.2.0,<8.0.0)`.
let patched = REQUIREMENT_FIXUP_REGEX.replace(s, r"$1,$2");
if patched != s {
if let Ok(requirement) = Requirement::from_str(&patched) {
warn!(
"Inserting missing comma into invalid requirement (before: `{s}`; after: `{patched}`)",
);
return Ok(Self(requirement));
}
}
Err(err)
}
}
}
}
impl From<LenientRequirement> for Requirement {
fn from(requirement: LenientRequirement) -> Self {
requirement.0
}
}
/// Like [`VersionSpecifiers`], but attempts to correct some common errors in user-provided requirements.
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
struct LenientVersionSpecifiers(VersionSpecifiers);
impl FromStr for LenientVersionSpecifiers {
type Err = Pep440Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match VersionSpecifiers::from_str(s) {
Ok(specifiers) => Ok(Self(specifiers)),
Err(err) => {
// Given `>=3.5.*`, rewrite to `>=3.5`.
let patched = match s {
">=3.12.*" => Some(">=3.12"),
">=3.11.*" => Some(">=3.11"),
">=3.10.*" => Some(">=3.10"),
">=3.9.*" => Some(">=3.9"),
">=3.8.*" => Some(">=3.8"),
">=3.7.*" => Some(">=3.7"),
">=3.6.*" => Some(">=3.6"),
">=3.5.*" => Some(">=3.5"),
">=3.4.*" => Some(">=3.4"),
">=3.3.*" => Some(">=3.3"),
">=3.2.*" => Some(">=3.2"),
">=3.1.*" => Some(">=3.1"),
">=3.0.*" => Some(">=3.0"),
_ => None,
};
if let Some(patched) = patched {
if let Ok(specifier) = VersionSpecifiers::from_str(patched) {
warn!(
"Correcting invalid wildcard bound on version specifier (before: `{s}`; after: `{patched}`)",
);
return Ok(Self(specifier));
}
}
Err(err)
}
}
}
}
impl From<LenientVersionSpecifiers> for VersionSpecifiers {
fn from(specifiers: LenientVersionSpecifiers) -> Self {
specifiers.0
}
}

View file

@ -12,6 +12,28 @@ use platform_tags::Tags;
use puffin_client::PypiClientBuilder; use puffin_client::PypiClientBuilder;
use puffin_resolver::Resolver; use puffin_resolver::Resolver;
#[tokio::test]
async fn pylint() -> Result<()> {
let client = PypiClientBuilder::default().build();
let requirements = vec![Requirement::from_str("pylint==2.3.0").unwrap()];
let resolver = Resolver::new(requirements, &MARKERS_311, &TAGS_311, &client);
let resolution = resolver.resolve().await?;
assert_eq!(
format!("{resolution}"),
[
"astroid==3.0.1",
"isort==6.0.0b2",
"mccabe==0.7.0",
"pylint==2.3.0"
]
.join("\n")
);
Ok(())
}
#[tokio::test] #[tokio::test]
async fn black() -> Result<()> { async fn black() -> Result<()> {
let client = PypiClientBuilder::default().build(); let client = PypiClientBuilder::default().build();
@ -109,7 +131,7 @@ async fn htmldate() -> Result<()> {
"regex==2023.10.3", "regex==2023.10.3",
"six==1.16.0", "six==1.16.0",
"tzlocal==5.1", "tzlocal==5.1",
"urllib3==2.0.6" "urllib3==2.0.7"
] ]
.join("\n") .join("\n")
); );