pep440: rewrite the parser and make version comparisons cheaper (#789)

This PR builds on #780 by making both version parsing faster, and
perhaps more importantly, making version comparisons much faster.
Overall, these changes result in a considerable improvement for the
`boto3.in` workload. Here's the status quo:

```
$ time puffin pip-compile --no-build --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/requirements/boto3.in
Resolved 31 packages in 34.56s

real    34.579
user    34.004
sys     0.413
maxmem  2867 MB
faults  0
```

And now with this PR:

```
$ time puffin pip-compile --no-build --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/requirements/boto3.in
Resolved 31 packages in 9.20s

real    9.218
user    8.919
sys     0.165
maxmem  463 MB
faults  0
```

This particular workload gets stuck in pubgrub doing resolution, and
thus benefits mightily from a faster `Version::cmp` routine. With that
said, this change does also help a fair bit with "normal" runs:

```
$ hyperfine -w10 \
    "puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in" \
    "puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in"
Benchmark 1: puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
  Time (mean ± σ):     337.5 ms ±   3.9 ms    [User: 310.5 ms, System: 73.2 ms]
  Range (min … max):   333.6 ms … 343.4 ms    10 runs

Benchmark 2: puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
  Time (mean ± σ):     189.8 ms ±   3.0 ms    [User: 168.1 ms, System: 78.4 ms]
  Range (min … max):   185.0 ms … 196.2 ms    15 runs

Summary
  puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in ran
    1.78 ± 0.03 times faster than puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
```

There is perhaps some future work here (detailed in the commit
messages), but I suspect it would be more fruitful to explore ways of
making resolution itself and/or deserialization faster.

Fixes #373, Closes #396
This commit is contained in:
Andrew Gallant 2024-01-05 11:57:32 -05:00 committed by GitHub
parent 74777c01ea
commit 6c98ae9d77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 2103 additions and 487 deletions

View file

@ -913,7 +913,7 @@ mod tests {
use indoc::indoc;
use pep440_rs::{Operator, Version, VersionSpecifier};
use pep440_rs::{Operator, Version, VersionPattern, VersionSpecifier};
use puffin_normalize::{ExtraName, PackageName};
use crate::marker::{
@ -977,11 +977,14 @@ mod tests {
[
VersionSpecifier::new(
Operator::GreaterThanEqual,
Version::new([2, 8, 1]),
false,
VersionPattern::verbatim(Version::new([2, 8, 1])),
)
.unwrap(),
VersionSpecifier::new(
Operator::Equal,
VersionPattern::wildcard(Version::new([2, 8])),
)
.unwrap(),
VersionSpecifier::new(Operator::Equal, Version::new([2, 8]), true).unwrap(),
]
.into_iter()
.collect(),
@ -1114,7 +1117,7 @@ mod tests {
assert_err(
"numpy ( ><1.19 )",
indoc! {"
No such comparison operator '><', must be one of ~= == != <= >= < > ===
no such comparison operator \"><\", must be one of ~= == != <= >= < > ===
numpy ( ><1.19 )
^^^^^^^"
},
@ -1430,7 +1433,7 @@ mod tests {
assert_err(
"name==1.0.org1",
indoc! {"
Version `1.0.org1` doesn't match PEP 440 rules
after parsing 1.0, found \".org1\" after it, which is not part of a valid version
name==1.0.org1
^^^^^^^^^^"
},

View file

@ -10,7 +10,7 @@
//! bogus comparisons with unintended semantics are made.
use crate::{Cursor, Pep508Error, Pep508ErrorSource};
use pep440_rs::{Version, VersionSpecifier};
use pep440_rs::{Version, VersionPattern, VersionSpecifier};
#[cfg(feature = "pyo3")]
use pyo3::{
basic::CompareOp, exceptions::PyValueError, pyclass, pymethods, PyAny, PyResult, Python,
@ -307,7 +307,7 @@ impl FromStr for StringVersion {
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self {
string: s.to_string(),
version: Version::from_str(s)?,
version: Version::from_str(s).map_err(|e| e.to_string())?,
})
}
}
@ -559,9 +559,9 @@ impl MarkerExpression {
// The only sound choice for this is `<version key> <version op> <quoted PEP 440 version>`
MarkerValue::MarkerEnvVersion(l_key) => {
let value = &self.r_value;
let (r_version, r_star) = if let MarkerValue::QuotedString(r_string) = &value {
match Version::from_str_star(r_string) {
Ok((version, star)) => (version, star),
let r_vpat = if let MarkerValue::QuotedString(r_string) = &value {
match r_string.parse::<VersionPattern>() {
Ok(vpat) => vpat,
Err(err) => {
reporter(MarkerWarningKind::Pep440Error, format!(
"Expected PEP 440 version to compare with {}, found {}, evaluating to false: {}",
@ -582,14 +582,14 @@ impl MarkerExpression {
None => {
reporter(MarkerWarningKind::Pep440Error, format!(
"Expected PEP 440 version operator to compare {} with '{}', found '{}', evaluating to false",
l_key, r_version, self.operator
l_key, r_vpat.version(), self.operator
), self);
return false;
}
Some(operator) => operator,
};
let specifier = match VersionSpecifier::new(operator, r_version, r_star) {
let specifier = match VersionSpecifier::new(operator, r_vpat) {
Ok(specifier) => specifier,
Err(err) => {
reporter(
@ -660,18 +660,20 @@ impl MarkerExpression {
Some(operator) => operator,
};
let specifier =
match VersionSpecifier::new(operator, r_version.clone(), false) {
Ok(specifier) => specifier,
Err(err) => {
reporter(
MarkerWarningKind::Pep440Error,
format!("Invalid operator/version combination: {err}"),
self,
);
return false;
}
};
let specifier = match VersionSpecifier::new(
operator,
VersionPattern::verbatim(r_version.clone()),
) {
Ok(specifier) => specifier,
Err(err) => {
reporter(
MarkerWarningKind::Pep440Error,
format!("Invalid operator/version combination: {err}"),
self,
);
return false;
}
};
specifier.contains(&l_version)
}
@ -756,10 +758,10 @@ impl MarkerExpression {
// ignore all errors block
(|| {
// The right hand side is allowed to contain a star, e.g. `python_version == '3.*'`
let (r_version, r_star) = Version::from_str_star(r_string).ok()?;
let r_vpat = r_string.parse::<VersionPattern>().ok()?;
let operator = operator.to_pep440_operator()?;
// operator and right hand side make the specifier
let specifier = VersionSpecifier::new(operator, r_version, r_star).ok()?;
let specifier = VersionSpecifier::new(operator, r_vpat).ok()?;
let compatible = python_versions
.iter()
@ -783,7 +785,10 @@ impl MarkerExpression {
let compatible = python_versions.iter().any(|r_version| {
// operator and right hand side make the specifier and in this case the
// right hand is `python_version` so changes every iteration
match VersionSpecifier::new(operator, r_version.clone(), false) {
match VersionSpecifier::new(
operator,
VersionPattern::verbatim(r_version.clone()),
) {
Ok(specifier) => specifier.contains(&l_version),
Err(_) => true,
}
@ -1439,7 +1444,9 @@ mod test {
testing_logger::validate(|captured_logs| {
assert_eq!(
captured_logs[0].body,
"Expected PEP 440 version to compare with python_version, found '3.9.', evaluating to false: Version `3.9.` doesn't match PEP 440 rules"
"Expected PEP 440 version to compare with python_version, found '3.9.', \
evaluating to false: after parsing 3.9, found \".\" after it, \
which is not part of a valid version"
);
assert_eq!(captured_logs[0].level, Level::Warn);
assert_eq!(captured_logs.len(), 1);