pep440: rewrite the parser and make version comparisons cheaper (#789)

This PR builds on #780 by making both version parsing faster, and
perhaps more importantly, making version comparisons much faster.
Overall, these changes result in a considerable improvement for the
`boto3.in` workload. Here's the status quo:

```
$ time puffin pip-compile --no-build --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/requirements/boto3.in
Resolved 31 packages in 34.56s

real    34.579
user    34.004
sys     0.413
maxmem  2867 MB
faults  0
```

And now with this PR:

```
$ time puffin pip-compile --no-build --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/requirements/boto3.in
Resolved 31 packages in 9.20s

real    9.218
user    8.919
sys     0.165
maxmem  463 MB
faults  0
```

This particular workload gets stuck in pubgrub doing resolution, and
thus benefits mightily from a faster `Version::cmp` routine. With that
said, this change does also help a fair bit with "normal" runs:

```
$ hyperfine -w10 \
    "puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in" \
    "puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in"
Benchmark 1: puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
  Time (mean ± σ):     337.5 ms ±   3.9 ms    [User: 310.5 ms, System: 73.2 ms]
  Range (min … max):   333.6 ms … 343.4 ms    10 runs

Benchmark 2: puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
  Time (mean ± σ):     189.8 ms ±   3.0 ms    [User: 168.1 ms, System: 78.4 ms]
  Range (min … max):   185.0 ms … 196.2 ms    15 runs

Summary
  puffin-cmparc pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in ran
    1.78 ± 0.03 times faster than puffin-base pip-compile --cache-dir ~/astral/tmp/cache/ -o /dev/null ./scripts/benchmarks/requirements.in
```

There is perhaps some future work here (detailed in the commit
messages), but I suspect it would be more fruitful to explore ways of
making resolution itself and/or deserialization faster.

Fixes #373, Closes #396
This commit is contained in:
Andrew Gallant 2024-01-05 11:57:32 -05:00 committed by GitHub
parent 74777c01ea
commit 6c98ae9d77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 2103 additions and 487 deletions

View file

@ -5,7 +5,7 @@ use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use pep440_rs::Version;
use pep440_rs::{Version, VersionParseError};
use puffin_normalize::{InvalidNameError, PackageName};
#[derive(Clone, Debug, PartialEq, Eq)]
@ -116,7 +116,7 @@ pub enum SourceDistFilenameError {
#[error("Source distributions filenames must end with .zip or .tar.gz, not {0}")]
InvalidExtension(String),
#[error("Source distribution filename version section is invalid: {0}")]
InvalidVersion(String),
InvalidVersion(VersionParseError),
#[error("Source distribution filename has an invalid package name: {0}")]
InvalidPackageName(String, #[source] InvalidNameError),
}

View file

@ -6,7 +6,7 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use thiserror::Error;
use url::Url;
use pep440_rs::Version;
use pep440_rs::{Version, VersionParseError};
use platform_tags::{TagPriority, Tags};
use puffin_normalize::{InvalidNameError, PackageName};
@ -217,7 +217,7 @@ pub enum WheelFilenameError {
#[error("The wheel filename \"{0}\" is invalid: {1}")]
InvalidWheelFileName(String, String),
#[error("The wheel filename \"{0}\" has an invalid version part: {1}")]
InvalidVersion(String, String),
InvalidVersion(String, VersionParseError),
#[error("The wheel filename \"{0}\" has an invalid package name")]
InvalidPackageName(String, InvalidNameError),
}
@ -278,7 +278,7 @@ mod tests {
#[test]
fn err_invalid_version() {
let err = WheelFilename::from_str("foo-x.y.z-python-abi-platform.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-x.y.z-python-abi-platform.whl" has an invalid version part: Version `x.y.z` doesn't match PEP 440 rules"###);
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-x.y.z-python-abi-platform.whl" has an invalid version part: expected version to start with a number, but no leading ASCII digits were found"###);
}
#[test]

View file

@ -12,10 +12,6 @@
//! assert!(version_specifiers.iter().all(|specifier| specifier.contains(&version)));
//! ```
//!
//! One thing that's a bit awkward about the API is that there's two kinds of
//! [Version]: One that doesn't allow stars (i.e. a package version), and one that does
//! (i.e. a version in a specifier), but they both use the same struct.
//!
//! The error handling and diagnostics is a bit overdone because this my parser-and-diagnostics
//! learning project (which kinda failed because the byte based regex crate and char-based
//! diagnostics don't mix well)
@ -43,7 +39,10 @@
#![deny(missing_docs)]
pub use {
version::{LocalSegment, Operator, PreRelease, Version},
version::{
LocalSegment, Operator, OperatorParseError, PreRelease, Version, VersionParseError,
VersionPattern, VersionPatternParseError,
},
version_specifier::{
parse_version_specifiers, VersionSpecifier, VersionSpecifiers, VersionSpecifiersParseError,
},

File diff suppressed because it is too large Load diff

View file

@ -14,7 +14,9 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
#[cfg(feature = "pyo3")]
use crate::version::PyVersion;
use crate::{version, Operator, Version};
use crate::{
version, Operator, OperatorParseError, Version, VersionPattern, VersionPatternParseError,
};
/// A thin wrapper around `Vec<VersionSpecifier>` with a serde implementation
///
@ -323,11 +325,12 @@ impl VersionSpecifier {
/// parameter indicates a trailing `.*`, to differentiate between `1.1.*` and `1.1`
pub fn new(
operator: Operator,
version: Version,
star: bool,
version_pattern: VersionPattern,
) -> Result<Self, VersionSpecifierBuildError> {
let star = version_pattern.is_wildcard();
let version = version_pattern.into_version();
// "Local version identifiers are NOT permitted in this version specifier."
if version.local().is_some() && !operator.is_local_compatible() {
if version.is_local() && !operator.is_local_compatible() {
return Err(BuildErrorKind::OperatorLocalCombo { operator, version }.into());
}
@ -391,7 +394,7 @@ impl VersionSpecifier {
// "Except where specifically noted below, local version identifiers MUST NOT be permitted
// in version specifiers, and local version labels MUST be ignored entirely when checking
// if candidate versions match a given version specifier."
let (this, other) = if self.version.local().is_some() {
let (this, other) = if !self.version.local().is_empty() {
(self.version.clone(), version.clone())
} else {
// self is already without local
@ -524,10 +527,9 @@ impl FromStr for VersionSpecifier {
if version.is_empty() {
return Err(ParseErrorKind::MissingVersion.into());
}
let (version, star) =
Version::from_str_star(version).map_err(ParseErrorKind::InvalidVersion)?;
let version_specifier = VersionSpecifier::new(operator, version, star)
.map_err(ParseErrorKind::InvalidSpecifier)?;
let vpat = version.parse().map_err(ParseErrorKind::InvalidVersion)?;
let version_specifier =
VersionSpecifier::new(operator, vpat).map_err(ParseErrorKind::InvalidSpecifier)?;
s.eat_while(|c: char| c.is_whitespace());
if !s.done() {
return Err(ParseErrorKind::InvalidTrailing(s.after().to_string()).into());
@ -565,7 +567,6 @@ impl std::fmt::Display for VersionSpecifierBuildError {
let local = version
.local()
.iter()
.flat_map(|segments| segments.iter())
.map(|segment| segment.to_string())
.collect::<Vec<String>>()
.join(".");
@ -661,8 +662,8 @@ impl std::fmt::Display for VersionSpecifierParseError {
/// specifier from a string.
#[derive(Clone, Debug, Eq, PartialEq)]
enum ParseErrorKind {
InvalidOperator(String),
InvalidVersion(String),
InvalidOperator(OperatorParseError),
InvalidVersion(VersionPatternParseError),
InvalidSpecifier(VersionSpecifierBuildError),
MissingOperator,
MissingVersion,
@ -726,7 +727,7 @@ mod tests {
use indoc::indoc;
use crate::LocalSegment;
use crate::{LocalSegment, PreRelease};
use super::*;
@ -1100,12 +1101,14 @@ mod tests {
("2.0.5", ">2.0dev"),
];
for (version, specifier) in pairs {
for (s_version, s_spec) in pairs {
let version = s_version.parse::<Version>().unwrap();
let spec = s_spec.parse::<VersionSpecifier>().unwrap();
assert!(
VersionSpecifier::from_str(specifier)
.unwrap()
.contains(&Version::from_str(version).unwrap()),
"{version} {specifier}"
spec.contains(&version),
"{s_version} {s_spec}\nversion repr: {:?}\nspec version repr: {:?}",
version.as_bloated_debug(),
spec.version.as_bloated_debug(),
);
}
}
@ -1255,10 +1258,8 @@ mod tests {
let result = VersionSpecifiers::from_str("== 0.9.*.1");
assert_eq!(
result.unwrap_err().inner.err,
ParseErrorKind::InvalidVersion(
"Version `0.9.*.1` doesn't match PEP 440 rules".to_string()
)
.into()
ParseErrorKind::InvalidVersion(version::PatternErrorKind::WildcardNotTrailing.into())
.into(),
);
}
@ -1295,10 +1296,9 @@ mod tests {
// Invalid operator
(
"=>2.0",
ParseErrorKind::InvalidOperator(
"No such comparison operator '=>', must be one of ~= == != <= >= < > ==="
.to_string(),
)
ParseErrorKind::InvalidOperator(OperatorParseError {
got: "=>".to_string(),
})
.into(),
),
// Version-less specifier
@ -1419,14 +1419,14 @@ mod tests {
(
"==1.0.*+5",
ParseErrorKind::InvalidVersion(
"Version `1.0.*+5` doesn't match PEP 440 rules".to_string(),
version::PatternErrorKind::WildcardNotTrailing.into(),
)
.into(),
),
(
"!=1.0.*+deadbeef",
ParseErrorKind::InvalidVersion(
"Version `1.0.*+deadbeef` doesn't match PEP 440 rules".to_string(),
version::PatternErrorKind::WildcardNotTrailing.into(),
)
.into(),
),
@ -1435,56 +1435,80 @@ mod tests {
(
"==2.0a1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a prerelease version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([2, 0]).with_pre(Some((PreRelease::Alpha, 1))),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"!=2.0a1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a prerelease version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([2, 0]).with_pre(Some((PreRelease::Alpha, 1))),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"==2.0.post1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a post version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([2, 0]).with_post(Some(1)),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"!=2.0.post1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a post version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([2, 0]).with_post(Some(1)),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"==2.0.dev1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a dev version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([2, 0]).with_dev(Some(1)),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"!=2.0.dev1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a dev version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([2, 0]).with_dev(Some(1)),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"==1.0+5.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a local version".to_string(),
version::ErrorKind::LocalEmpty { precursor: '.' }.into(),
)
.into(),
),
(
"!=1.0+deadbeef.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a local version".to_string(),
version::ErrorKind::LocalEmpty { precursor: '.' }.into(),
)
.into(),
),
@ -1492,7 +1516,7 @@ mod tests {
(
"==1.0.*.5",
ParseErrorKind::InvalidVersion(
"Version `1.0.*.5` doesn't match PEP 440 rules".to_string(),
version::PatternErrorKind::WildcardNotTrailing.into(),
)
.into(),
),
@ -1505,14 +1529,22 @@ mod tests {
(
"==1.0.dev1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a dev version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([1, 0]).with_dev(Some(1)),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
(
"!=1.0.dev1.*",
ParseErrorKind::InvalidVersion(
"You can't have both a trailing `.*` and a dev version".to_string(),
version::ErrorKind::UnexpectedEnd {
version: Version::new([1, 0]).with_dev(Some(1)),
remaining: ".*".to_string(),
}
.into(),
)
.into(),
),
@ -1625,7 +1657,8 @@ Failed to parse version: Unexpected end of version specifier, expected operator:
};
let op = Operator::TildeEqual;
let v = Version::new([5]);
assert_eq!(err, VersionSpecifier::new(op, v, false).unwrap_err());
let vpat = VersionPattern::verbatim(v);
assert_eq!(err, VersionSpecifier::new(op, vpat).unwrap_err());
assert_eq!(
err.to_string(),
"The ~= operator requires at least two segments in the release version"

View file

@ -913,7 +913,7 @@ mod tests {
use indoc::indoc;
use pep440_rs::{Operator, Version, VersionSpecifier};
use pep440_rs::{Operator, Version, VersionPattern, VersionSpecifier};
use puffin_normalize::{ExtraName, PackageName};
use crate::marker::{
@ -977,11 +977,14 @@ mod tests {
[
VersionSpecifier::new(
Operator::GreaterThanEqual,
Version::new([2, 8, 1]),
false,
VersionPattern::verbatim(Version::new([2, 8, 1])),
)
.unwrap(),
VersionSpecifier::new(
Operator::Equal,
VersionPattern::wildcard(Version::new([2, 8])),
)
.unwrap(),
VersionSpecifier::new(Operator::Equal, Version::new([2, 8]), true).unwrap(),
]
.into_iter()
.collect(),
@ -1114,7 +1117,7 @@ mod tests {
assert_err(
"numpy ( ><1.19 )",
indoc! {"
No such comparison operator '><', must be one of ~= == != <= >= < > ===
no such comparison operator \"><\", must be one of ~= == != <= >= < > ===
numpy ( ><1.19 )
^^^^^^^"
},
@ -1430,7 +1433,7 @@ mod tests {
assert_err(
"name==1.0.org1",
indoc! {"
Version `1.0.org1` doesn't match PEP 440 rules
after parsing 1.0, found \".org1\" after it, which is not part of a valid version
name==1.0.org1
^^^^^^^^^^"
},

View file

@ -10,7 +10,7 @@
//! bogus comparisons with unintended semantics are made.
use crate::{Cursor, Pep508Error, Pep508ErrorSource};
use pep440_rs::{Version, VersionSpecifier};
use pep440_rs::{Version, VersionPattern, VersionSpecifier};
#[cfg(feature = "pyo3")]
use pyo3::{
basic::CompareOp, exceptions::PyValueError, pyclass, pymethods, PyAny, PyResult, Python,
@ -307,7 +307,7 @@ impl FromStr for StringVersion {
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self {
string: s.to_string(),
version: Version::from_str(s)?,
version: Version::from_str(s).map_err(|e| e.to_string())?,
})
}
}
@ -559,9 +559,9 @@ impl MarkerExpression {
// The only sound choice for this is `<version key> <version op> <quoted PEP 440 version>`
MarkerValue::MarkerEnvVersion(l_key) => {
let value = &self.r_value;
let (r_version, r_star) = if let MarkerValue::QuotedString(r_string) = &value {
match Version::from_str_star(r_string) {
Ok((version, star)) => (version, star),
let r_vpat = if let MarkerValue::QuotedString(r_string) = &value {
match r_string.parse::<VersionPattern>() {
Ok(vpat) => vpat,
Err(err) => {
reporter(MarkerWarningKind::Pep440Error, format!(
"Expected PEP 440 version to compare with {}, found {}, evaluating to false: {}",
@ -582,14 +582,14 @@ impl MarkerExpression {
None => {
reporter(MarkerWarningKind::Pep440Error, format!(
"Expected PEP 440 version operator to compare {} with '{}', found '{}', evaluating to false",
l_key, r_version, self.operator
l_key, r_vpat.version(), self.operator
), self);
return false;
}
Some(operator) => operator,
};
let specifier = match VersionSpecifier::new(operator, r_version, r_star) {
let specifier = match VersionSpecifier::new(operator, r_vpat) {
Ok(specifier) => specifier,
Err(err) => {
reporter(
@ -660,18 +660,20 @@ impl MarkerExpression {
Some(operator) => operator,
};
let specifier =
match VersionSpecifier::new(operator, r_version.clone(), false) {
Ok(specifier) => specifier,
Err(err) => {
reporter(
MarkerWarningKind::Pep440Error,
format!("Invalid operator/version combination: {err}"),
self,
);
return false;
}
};
let specifier = match VersionSpecifier::new(
operator,
VersionPattern::verbatim(r_version.clone()),
) {
Ok(specifier) => specifier,
Err(err) => {
reporter(
MarkerWarningKind::Pep440Error,
format!("Invalid operator/version combination: {err}"),
self,
);
return false;
}
};
specifier.contains(&l_version)
}
@ -756,10 +758,10 @@ impl MarkerExpression {
// ignore all errors block
(|| {
// The right hand side is allowed to contain a star, e.g. `python_version == '3.*'`
let (r_version, r_star) = Version::from_str_star(r_string).ok()?;
let r_vpat = r_string.parse::<VersionPattern>().ok()?;
let operator = operator.to_pep440_operator()?;
// operator and right hand side make the specifier
let specifier = VersionSpecifier::new(operator, r_version, r_star).ok()?;
let specifier = VersionSpecifier::new(operator, r_vpat).ok()?;
let compatible = python_versions
.iter()
@ -783,7 +785,10 @@ impl MarkerExpression {
let compatible = python_versions.iter().any(|r_version| {
// operator and right hand side make the specifier and in this case the
// right hand is `python_version` so changes every iteration
match VersionSpecifier::new(operator, r_version.clone(), false) {
match VersionSpecifier::new(
operator,
VersionPattern::verbatim(r_version.clone()),
) {
Ok(specifier) => specifier.contains(&l_version),
Err(_) => true,
}
@ -1439,7 +1444,9 @@ mod test {
testing_logger::validate(|captured_logs| {
assert_eq!(
captured_logs[0].body,
"Expected PEP 440 version to compare with python_version, found '3.9.', evaluating to false: Version `3.9.` doesn't match PEP 440 rules"
"Expected PEP 440 version to compare with python_version, found '3.9.', \
evaluating to false: after parsing 3.9, found \".\" after it, \
which is not part of a valid version"
);
assert_eq!(captured_logs[0].level, Level::Warn);
assert_eq!(captured_logs.len(), 1);

View file

@ -701,7 +701,7 @@ fn compile_python_invalid_version() -> Result<()> {
----- stdout -----
----- stderr -----
error: invalid value '3.7.x' for '--python-version <PYTHON_VERSION>': Version `3.7.x` doesn't match PEP 440 rules
error: invalid value '3.7.x' for '--python-version <PYTHON_VERSION>': after parsing 3.7, found ".x" after it, which is not part of a valid version
For more information, try '--help'.
"###);

View file

@ -49,7 +49,7 @@ fn invalid_requirement() -> Result<()> {
----- stderr -----
error: Failed to parse `flask==1.0.x`
Caused by: Version `1.0.x` doesn't match PEP 440 rules
Caused by: after parsing 1.0, found ".x" after it, which is not part of a valid version
flask==1.0.x
^^^^^^^
"###);
@ -96,7 +96,7 @@ fn invalid_requirements_txt_requirement() -> Result<()> {
----- stderr -----
error: Couldn't parse requirement in requirements.txt position 0 to 12
Caused by: Version `1.0.x` doesn't match PEP 440 rules
Caused by: after parsing 1.0, found ".x" after it, which is not part of a valid version
flask==1.0.x
^^^^^^^
"###);
@ -210,7 +210,7 @@ dependencies = ["flask==1.0.x"]
|
3 | dependencies = ["flask==1.0.x"]
| ^^^^^^^^^^^^^^^^
Version `1.0.x` doesn't match PEP 440 rules
after parsing 1.0, found ".x" after it, which is not part of a valid version
flask==1.0.x
^^^^^^^

View file

@ -7,7 +7,7 @@ use mailparse::{MailHeaderMap, MailParseError};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use pep440_rs::{Version, VersionSpecifiers, VersionSpecifiersParseError};
use pep440_rs::{Version, VersionParseError, VersionSpecifiers, VersionSpecifiersParseError};
use pep508_rs::{Pep508Error, Requirement};
use puffin_normalize::{ExtraName, InvalidNameError, PackageName};
@ -60,7 +60,7 @@ pub enum Error {
MultipleMetadataFiles(Vec<String>),
/// Invalid Version
#[error("invalid version: {0}")]
Pep440VersionError(String),
Pep440VersionError(VersionParseError),
/// Invalid VersionSpecifier
#[error(transparent)]
Pep440Error(#[from] VersionSpecifiersParseError),