Filter out files with invalid requires python specifiers (#775)

Instead of trying to fixup _all_ the invalid version specifiers on pypi
and elsewhere, this filters out distributions with invalid
`requires-python` version specifiers that even
`LenientVersionSpecifiers` couldn't parse, as opposed to failing
entirely, which we currently do.

I would be nicer to model through an invalid distribution pubgrub type,
together with e.g. source dists with an unknown extension, so that the
version itself still shows up in the error trace.

At the same time, we reduce the log level for fixups from warning to
trace, as they are not actionable for the user.
This commit is contained in:
konsti 2024-01-09 03:46:27 +01:00 committed by GitHub
parent 64da1f0306
commit b1edecdf1f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 89 additions and 23 deletions

View file

@ -3,10 +3,10 @@ use std::str::FromStr;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{de, Deserialize, Deserializer, Serialize};
use tracing::warn;
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use pep508_rs::{Pep508Error, Requirement};
use puffin_warnings::warn_user_once;
/// Ex) `>=7.2.0<8.0.0`
static MISSING_COMMA: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d)([<>=~^!])").unwrap());
@ -62,7 +62,7 @@ fn parse_with_fixups<Err, T: FromStr<Err = Err>>(input: &str, type_name: &str) -
}
if let Ok(requirement) = T::from_str(&patched_input) {
warn_user_once!(
warn!(
"Fixing invalid {type_name} by {} (before: `{input}`; after: `{patched_input}`)",
messages.join(", ")
);

View file

@ -1,9 +1,9 @@
use std::str::FromStr;
use chrono::{DateTime, Utc};
use serde::{de, Deserialize, Deserializer, Serialize};
use serde::{Deserialize, Deserializer, Serialize};
use pep440_rs::VersionSpecifiers;
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use crate::lenient_requirement::LenientVersionSpecifiers;
@ -23,10 +23,11 @@ pub struct File {
pub dist_info_metadata: Option<DistInfoMetadata>,
pub filename: String,
pub hashes: Hashes,
/// Note: Deserialized with [`LenientVersionSpecifiers`] since there are a number of invalid
/// versions on pypi
/// There are a number of invalid specifiers on pypi, so we first try to parse it into a [`VersionSpecifiers`]
/// according to spec (PEP 440), then a [`LenientVersionSpecifiers`] with fixup for some common problems and if this
/// still fails, we skip the file when creating a version map.
#[serde(default, deserialize_with = "deserialize_version_specifiers_lenient")]
pub requires_python: Option<VersionSpecifiers>,
pub requires_python: Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>,
pub size: Option<usize>,
pub upload_time: Option<DateTime<Utc>>,
pub url: String,
@ -35,7 +36,7 @@ pub struct File {
fn deserialize_version_specifiers_lenient<'de, D>(
deserializer: D,
) -> Result<Option<VersionSpecifiers>, D::Error>
) -> Result<Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>, D::Error>
where
D: Deserializer<'de>,
{
@ -43,8 +44,9 @@ where
let Some(string) = maybe_string else {
return Ok(None);
};
let lenient = LenientVersionSpecifiers::from_str(&string).map_err(de::Error::custom)?;
Ok(Some(lenient.into()))
Ok(Some(
LenientVersionSpecifiers::from_str(&string).map(Into::into),
))
}
#[derive(Debug, Clone, Serialize, Deserialize)]