pep440: rewrite the parser and make version comparisons cheaper (#789)

This PR builds on #780 by making both version parsing faster, and
perhaps more importantly, making version comparisons much faster.
Overall, these changes result in a considerable improvement for the
`boto3.in` workload. Here's the status quo:

```
$ time puffin pip-compile --no-build --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/requirements/boto3.in
Resolved 31 packages in 34.56s

real    34.579
user    34.004
sys     0.413
maxmem  2867 MB
faults  0
```

And now with this PR:

```
$ time puffin pip-compile --no-build --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/requirements/boto3.in
Resolved 31 packages in 9.20s

real    9.218
user    8.919
sys     0.165
maxmem  463 MB
faults  0
```

This particular workload gets stuck in pubgrub doing resolution, and
thus benefits mightily from a faster `Version::cmp` routine. With that
said, this change does also help a fair bit with "normal" runs:

```
$ hyperfine -w10 \
    "puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in" \
    "puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in"
Benchmark 1: puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
  Time (mean ± σ):     337.5 ms ±   3.9 ms    [User: 310.5 ms, System: 73.2 ms]
  Range (min … max):   333.6 ms … 343.4 ms    10 runs

Benchmark 2: puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
  Time (mean ± σ):     189.8 ms ±   3.0 ms    [User: 168.1 ms, System: 78.4 ms]
  Range (min … max):   185.0 ms … 196.2 ms    15 runs

Summary
  puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in ran
    1.78 ± 0.03 times faster than puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
```

There is perhaps some future work here (detailed in the commit
messages), but I suspect it would be more fruitful to explore ways of
making resolution itself and/or deserialization faster.

Fixes #373, Closes #396
This commit is contained in:
Andrew Gallant 2024-01-05 11:57:32 -05:00 committed by GitHub
parent 74777c01ea
commit 6c98ae9d77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 2103 additions and 487 deletions

View file

@ -5,7 +5,7 @@ use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use pep440_rs::Version;
use pep440_rs::{Version, VersionParseError};
use puffin_normalize::{InvalidNameError, PackageName};
#[derive(Clone, Debug, PartialEq, Eq)]
@ -116,7 +116,7 @@ pub enum SourceDistFilenameError {
#[error("Source distributions filenames must end with .zip or .tar.gz, not {0}")]
InvalidExtension(String),
#[error("Source distribution filename version section is invalid: {0}")]
InvalidVersion(String),
InvalidVersion(VersionParseError),
#[error("Source distribution filename has an invalid package name: {0}")]
InvalidPackageName(String, #[source] InvalidNameError),
}

View file

@ -6,7 +6,7 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use thiserror::Error;
use url::Url;
use pep440_rs::Version;
use pep440_rs::{Version, VersionParseError};
use platform_tags::{TagPriority, Tags};
use puffin_normalize::{InvalidNameError, PackageName};
@ -217,7 +217,7 @@ pub enum WheelFilenameError {
#[error("The wheel filename \"{0}\" is invalid: {1}")]
InvalidWheelFileName(String, String),
#[error("The wheel filename \"{0}\" has an invalid version part: {1}")]
InvalidVersion(String, String),
InvalidVersion(String, VersionParseError),
#[error("The wheel filename \"{0}\" has an invalid package name")]
InvalidPackageName(String, InvalidNameError),
}
@ -278,7 +278,7 @@ mod tests {
#[test]
fn err_invalid_version() {
let err = WheelFilename::from_str("foo-x.y.z-python-abi-platform.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-x.y.z-python-abi-platform.whl" has an invalid version part: Version `x.y.z` doesn't match PEP 440 rules"###);
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-x.y.z-python-abi-platform.whl" has an invalid version part: expected version to start with a number, but no leading ASCII digits were found"###);
}
#[test]