distribution-filename: speed up is_compatible (#367)

This PR tweaks the representation of `Tags` in order to offer a
faster implementation of `WheelFilename::is_compatible`. We now use a
nested map of tags that lets us avoid looping over every supported
platform tag. As the code comments suggest, that is the essential gain.
We still do not mind looping over the tags in each wheel name since they
tend to be quite small. And pushing our thumb on that side of things can
make things worse overall since it would likely slow down WheelFilename
construction itself.

For micro-benchmarks, we improve considerably for compatibility
checking:

    $ critcmp base test3
group base test3
----- ---- -----
build_platform_tags/burntsushi-archlinux 1.00 46.2±0.28µs ? ?/sec 2.48
114.8±0.45µs ? ?/sec
wheelname_parsing/flyte-long-compatible 1.00 624.8±3.31ns 174.0 MB/sec
1.01 629.4±4.30ns 172.7 MB/sec
wheelname_parsing/flyte-long-incompatible 1.00 743.6±4.23ns 165.4 MB/sec
1.00 746.9±4.62ns 164.7 MB/sec
wheelname_parsing/flyte-short-compatible 1.00 526.7±4.76ns 54.3 MB/sec
1.01 530.2±5.81ns 54.0 MB/sec
wheelname_parsing/flyte-short-incompatible 1.00 540.4±4.93ns 60.0 MB/sec
1.01 545.7±5.31ns 59.4 MB/sec
wheelname_parsing_failure/flyte-long-extension 1.00 13.6±0.13ns 3.2
GB/sec 1.01 13.7±0.14ns 3.2 GB/sec
wheelname_parsing_failure/flyte-short-extension 1.00 14.0±0.20ns 1160.4
MB/sec 1.01 14.1±0.14ns 1146.5 MB/sec
wheelname_tag_compatibility/flyte-long-compatible 11.33 159.8±2.79ns
680.5 MB/sec 1.00 14.1±0.23ns 7.5 GB/sec
wheelname_tag_compatibility/flyte-long-incompatible 237.60
1671.8±37.99ns 73.6 MB/sec 1.00 7.0±0.08ns 17.1 GB/sec
wheelname_tag_compatibility/flyte-short-compatible 16.07 223.5±8.60ns
128.0 MB/sec 1.00 13.9±0.30ns 2.0 GB/sec
wheelname_tag_compatibility/flyte-short-incompatible 149.83 628.3±2.13ns
51.6 MB/sec 1.00 4.2±0.10ns 7.6 GB/sec

We do regress slightly on the time it takes for `Tags::new` to run, but
this is somewhat expected. And in absolute terms, 114us is perfectly
acceptable given that it's only executed ~once for each `puffin`
invocation.

Ad hoc benchmarks indicate an overall 25% perf improvement in `puffin
pip-compile` times. This roughly corresponds with how much time
`is_compatible` was taking. Indeed, profiling confirms that it has
virtually disappeared from the profile.

Fixes #157
This commit is contained in:
Andrew Gallant 2023-11-09 09:01:03 -05:00 committed by GitHub
parent bdb89b4072
commit 33c0901a28
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 571 additions and 28 deletions

127
Cargo.lock generated
View file

@ -83,6 +83,12 @@ dependencies = [
"libc",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
version = "0.6.4"
@ -225,7 +231,7 @@ dependencies = [
"bisection",
"futures",
"http-content-range",
"itertools",
"itertools 0.11.0",
"memmap2 0.9.0",
"reqwest",
"thiserror",
@ -293,6 +299,15 @@ version = "0.21.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
[[package]]
name = "bench"
version = "0.0.0"
dependencies = [
"criterion",
"distribution-filename",
"platform-tags",
]
[[package]]
name = "bincode"
version = "1.3.3"
@ -443,6 +458,12 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.83"
@ -497,6 +518,33 @@ dependencies = [
"stacker",
]
[[package]]
name = "ciborium"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
[[package]]
name = "ciborium-ll"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "4.4.7"
@ -627,6 +675,40 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "criterion"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
dependencies = [
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"is-terminal",
"itertools 0.10.5",
"num-traits",
"once_cell",
"oorandom",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
@ -775,6 +857,7 @@ dependencies = [
name = "distribution-filename"
version = "0.0.1"
dependencies = [
"insta",
"pep440_rs 0.3.12",
"platform-tags",
"puffin-normalize",
@ -1121,6 +1204,12 @@ dependencies = [
"tracing",
]
[[package]]
name = "half"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hashbrown"
version = "0.12.3"
@ -1489,6 +1578,15 @@ version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
@ -1808,6 +1906,12 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "openssl-probe"
version = "0.1.5"
@ -2038,6 +2142,7 @@ dependencies = [
name = "platform-tags"
version = "0.0.1"
dependencies = [
"fxhash",
"platform-host",
]
@ -2082,7 +2187,7 @@ dependencies = [
"anstyle",
"difflib",
"float-cmp",
"itertools",
"itertools 0.11.0",
"normalize-line-endings",
"predicates-core",
"regex",
@ -2224,7 +2329,7 @@ dependencies = [
"insta",
"insta-cmd",
"install-wheel-rs",
"itertools",
"itertools 0.11.0",
"miette",
"pep440_rs 0.3.12",
"pep508_rs",
@ -2295,7 +2400,7 @@ dependencies = [
"futures",
"gourgeist",
"indicatif",
"itertools",
"itertools 0.11.0",
"pep508_rs",
"platform-host",
"platform-tags",
@ -2320,7 +2425,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"gourgeist",
"itertools",
"itertools 0.11.0",
"pep508_rs",
"platform-host",
"platform-tags",
@ -2443,7 +2548,7 @@ dependencies = [
"gourgeist",
"insta",
"install-wheel-rs",
"itertools",
"itertools 0.11.0",
"once_cell",
"pep440_rs 0.3.12",
"pep508_rs",
@ -3466,6 +3571,16 @@ dependencies = [
"crunchy",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "tinyvec"
version = "1.6.0"

26
crates/bench/Cargo.toml Normal file
View file

@ -0,0 +1,26 @@
[package]
name = "bench"
version = "0.0.0"
description = "Puffin Micro-benchmarks"
publish = false
authors = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[lib]
bench = false
[[bench]]
name = "distribution-filename"
path = "benches/distribution_filename.rs"
harness = false
[dependencies]
criterion = { version = "0.5.1", default-features = false }
distribution-filename.path = "../distribution-filename"
platform-tags.path = "../platform-tags"

View file

@ -0,0 +1,162 @@
use {distribution_filename::WheelFilename, platform_tags::Tags};
use bench::criterion::{
criterion_group, criterion_main, measurement::WallTime, BenchmarkId, Criterion, Throughput,
};
/// A set of platform tags extracted from burntsushi's Archlinux workstation.
/// We could just re-create these via `Tags::from_env`, but those might differ
/// depending on the platform. This way, we always use the same data. It also
/// lets us assert tag compatibility regardless of where the benchmarks run.
const PLATFORM_TAGS: &[(&str, &str, &str)] = include!("../inputs/platform_tags.rs");
/// A set of wheel names used in the benchmarks below. We pick short and long
/// names, as well as compatible and not-compatibles (with `PLATFORM_TAGS`)
/// names.
///
/// The tuple is (name, filename, compatible) where `name` is a descriptive
/// name for humans used in the benchmark definition. And `filename` is the
/// actual wheel filename we want to benchmark operation on. And `compatible`
/// indicates whether the tags in the wheel filename are expected to be
/// compatible with the tags in `PLATFORM_TAGS`.
const WHEEL_NAMES: &[(&str, &str, bool)] = &[
// This tests a case with a very short name that is *not* compatible
// with PLATFORM_TAGS. It only uses one tag for each component (one
// Python version, one ABI and one platform).
(
"flyte-short-incompatible",
"hypothesis-4.24.5-py2-none-any.whl",
false,
),
// This tests a case with a very short name that *is* compatible with
// PLATFORM_TAGS. It only uses one tag for each component (one Python
// version, one ABI and one platform).
(
"flyte-short-compatible",
"ipython-2.1.0-py3-none-any.whl",
true,
),
// This tests a case with a long name that is *not* compatible. That
// is, all platform tags need to be checked against the tags in the
// wheel filename. This is essentially the worst possible practical
// case.
(
"flyte-long-incompatible",
"protobuf-3.5.2.post1-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
false,
),
// This tests a case with a long name that *is* compatible. We
// expect this to be (on average) quicker because the compatibility
// check stops as soon as a positive match is found. (Where as the
// incompatible case needs to check all tags.)
(
"flyte-long-compatible",
"coverage-6.6.0b1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
true,
),
];
/// A list of names that are candidates for wheel filenames but will ultimately
/// fail to parse.
const INVALID_WHEEL_NAMES: &[(&str, &str)] = &[
("flyte-short-extension", "mock-5.1.0.tar.gz"),
(
"flyte-long-extension",
"Pillow-5.4.0.dev0-py3.7-macosx-10.13-x86_64.egg",
),
];
/// Benchmarks the construction of platform tags.
///
/// This only happens ~once per program startup. Originally, construction was
/// trivial. But to speed up `WheelFilename::is_compatible`, we added some
/// extra processing. We thus expect construction to become slower, but we
/// write a benchmark to ensure it is still "reasonable."
fn benchmark_build_platform_tags(c: &mut Criterion<WallTime>) {
let tags: Vec<(String, String, String)> = PLATFORM_TAGS
.iter()
.map(|&(py, abi, plat)| (py.to_string(), abi.to_string(), plat.to_string()))
.collect();
let mut group = c.benchmark_group("build_platform_tags");
group.bench_function(BenchmarkId::from_parameter("burntsushi-archlinux"), |b| {
b.iter(|| std::hint::black_box(Tags::new(tags.clone())));
});
group.finish();
}
/// Benchmarks `WheelFilename::from_str`. This has been observed to take some
/// non-trivial time in profiling (although, at time of writing, not as much
/// as tag compatibility). In the process of optimizing tag compatibility,
/// we tweaked wheel filename parsing. This benchmark was therefore added to
/// ensure we didn't regress here.
fn benchmark_wheelname_parsing(c: &mut Criterion<WallTime>) {
let mut group = c.benchmark_group("wheelname_parsing");
for (name, filename, _) in WHEEL_NAMES.iter().copied() {
let len = u64::try_from(filename.len()).expect("length fits in u64");
group.throughput(Throughput::Bytes(len));
group.bench_function(BenchmarkId::from_parameter(name), |b| {
b.iter(|| {
filename
.parse::<WheelFilename>()
.expect("valid wheel filename");
});
});
}
group.finish();
}
/// Benchmarks `WheelFilename::from_str` when it fails. This routine is called
/// on every filename in a package's metadata. A non-trivial portion of which
/// are not wheel filenames. Ensuring that the error path is fast is thus
/// probably a good idea.
fn benchmark_wheelname_parsing_failure(c: &mut Criterion<WallTime>) {
let mut group = c.benchmark_group("wheelname_parsing_failure");
for (name, filename) in INVALID_WHEEL_NAMES.iter().copied() {
let len = u64::try_from(filename.len()).expect("length fits in u64");
group.throughput(Throughput::Bytes(len));
group.bench_function(BenchmarkId::from_parameter(name), |b| {
b.iter(|| {
filename
.parse::<WheelFilename>()
.expect_err("invalid wheel filename");
});
});
}
group.finish();
}
/// Benchmarks the `WheelFilename::is_compatible` routine. This was revealed
/// to be the #1 bottleneck in the resolver. The main issue was that the
/// set of platform tags (generated once) is quite large, and the original
/// implementation did an exhaustive search over each of them for each tag in
/// the wheel filename.
fn benchmark_wheelname_tag_compatibility(c: &mut Criterion<WallTime>) {
let tags: Vec<(String, String, String)> = PLATFORM_TAGS
.iter()
.map(|&(py, abi, plat)| (py.to_string(), abi.to_string(), plat.to_string()))
.collect();
let tags = Tags::new(tags);
let mut group = c.benchmark_group("wheelname_tag_compatibility");
for (name, filename, expected) in WHEEL_NAMES.iter().copied() {
let wheelname: WheelFilename = filename.parse().expect("valid wheel filename");
let len = u64::try_from(filename.len()).expect("length fits in u64");
group.throughput(Throughput::Bytes(len));
group.bench_function(BenchmarkId::from_parameter(name), |b| {
b.iter(|| {
assert_eq!(expected, wheelname.is_compatible(&tags));
});
});
}
group.finish();
}
criterion_group!(
distribution_filename,
benchmark_build_platform_tags,
benchmark_wheelname_parsing,
benchmark_wheelname_parsing_failure,
benchmark_wheelname_tag_compatibility,
);
criterion_main!(distribution_filename);

File diff suppressed because one or more lines are too long

7
crates/bench/src/lib.rs Normal file
View file

@ -0,0 +1,7 @@
pub mod criterion {
//! This module re-exports the criterion API unconditionally for now. It's
//! intended that in the future this be a way to switch the backend to
//! something else (like codspeed).
pub use criterion::*;
}

View file

@ -16,3 +16,6 @@ puffin-normalize = { path = "../puffin-normalize" }
thiserror = { workspace = true }
url = { workspace = true }
[dev-dependencies]
insta = { version = "1.34.0" }

View file

@ -0,0 +1,32 @@
---
source: crates/distribution-filename/src/wheel.rs
expression: "WheelFilename::from_str(\"foo-1.2.3-build-python-abi-platform.whl\")"
---
Ok(
WheelFilename {
distribution: PackageName(
"foo",
),
version: Version {
epoch: 0,
release: [
1,
2,
3,
],
pre: None,
post: None,
dev: None,
local: None,
},
python_tag: [
"python",
],
abi_tag: [
"abi",
],
platform_tag: [
"platform",
],
},
)

View file

@ -0,0 +1,39 @@
---
source: crates/distribution-filename/src/wheel.rs
expression: "WheelFilename::from_str(\"foo-1.2.3-ab.cd.ef-gh-ij.kl.mn.op.qr.st.whl\")"
---
Ok(
WheelFilename {
distribution: PackageName(
"foo",
),
version: Version {
epoch: 0,
release: [
1,
2,
3,
],
pre: None,
post: None,
dev: None,
local: None,
},
python_tag: [
"ab",
"cd",
"ef",
],
abi_tag: [
"gh",
],
platform_tag: [
"ij",
"kl",
"mn",
"op",
"qr",
"st",
],
},
)

View file

@ -0,0 +1,32 @@
---
source: crates/distribution-filename/src/wheel.rs
expression: "WheelFilename::from_str(\"foo-1.2.3-foo-bar-baz.whl\")"
---
Ok(
WheelFilename {
distribution: PackageName(
"foo",
),
version: Version {
epoch: 0,
release: [
1,
2,
3,
],
pre: None,
post: None,
dev: None,
local: None,
},
python_tag: [
"foo",
],
abi_tag: [
"bar",
],
platform_tag: [
"baz",
],
},
)

View file

@ -31,14 +31,17 @@ impl FromStr for WheelFilename {
// The wheel filename should contain either five or six entries. If six, then the third
// entry is the build tag. If five, then the third entry is the Python tag.
// https://www.python.org/dev/peps/pep-0427/#file-name-convention
//
// 2023-11-08(burntsushi): It looks like the code below actually drops
// the build tag if one is found. According to PEP 0427, the build tag
// is used to break ties. This might mean that we generate identical
// `WheelName` values for multiple distinct wheels, but it's not clear
// if this is a problem in practice.
let mut parts = basename.split('-');
let Some(distribution) = parts.next() else {
return Err(WheelFilenameError::InvalidWheelFileName(
filename.to_string(),
"Must have a distribution name".to_string(),
));
};
let distribution = parts
.next()
.expect("split always yields 1 or more elements");
let Some(version) = parts.next() else {
return Err(WheelFilenameError::InvalidWheelFileName(
@ -70,6 +73,12 @@ impl FromStr for WheelFilename {
let (distribution, version, python_tag, abi_tag, platform_tag) =
if let Some(platform_tag) = parts.next() {
if parts.next().is_some() {
return Err(WheelFilenameError::InvalidWheelFileName(
filename.to_string(),
"Must have 5 or 6 components, but has more".to_string(),
));
}
(
distribution,
version,
@ -116,15 +125,7 @@ impl Display for WheelFilename {
impl WheelFilename {
/// Returns `true` if the wheel is compatible with the given tags.
pub fn is_compatible(&self, compatible_tags: &Tags) -> bool {
for tag in compatible_tags.iter() {
if self.python_tag.contains(&tag.0)
&& self.abi_tag.contains(&tag.1)
&& self.platform_tag.contains(&tag.2)
{
return true;
}
}
false
compatible_tags.is_compatible(&self.python_tag, &self.abi_tag, &self.platform_tag)
}
/// Get the tag for this wheel.
@ -170,3 +171,82 @@ pub enum WheelFilenameError {
#[error("The wheel filename \"{0}\" has an invalid package name")]
InvalidPackageName(String, InvalidNameError),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn err_not_whl_extension() {
let err = WheelFilename::from_str("foo.rs").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo.rs" is invalid: Must end with .whl"###);
}
#[test]
fn err_1_part_empty() {
let err = WheelFilename::from_str(".whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename ".whl" is invalid: Must have a version"###);
}
#[test]
fn err_1_part_no_version() {
let err = WheelFilename::from_str("foo.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo.whl" is invalid: Must have a version"###);
}
#[test]
fn err_2_part_no_pythontag() {
let err = WheelFilename::from_str("foo-version.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-version.whl" is invalid: Must have a Python tag"###);
}
#[test]
fn err_3_part_no_abitag() {
let err = WheelFilename::from_str("foo-version-python.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-version-python.whl" is invalid: Must have an ABI tag"###);
}
#[test]
fn err_4_part_no_platformtag() {
let err = WheelFilename::from_str("foo-version-python-abi.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-version-python-abi.whl" is invalid: Must have a platform tag"###);
}
#[test]
fn err_too_many_parts() {
let err =
WheelFilename::from_str("foo-1.2.3-build-python-abi-platform-oops.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-1.2.3-build-python-abi-platform-oops.whl" is invalid: Must have 5 or 6 components, but has more"###);
}
#[test]
fn err_invalid_package_name() {
let err = WheelFilename::from_str("f!oo-1.2.3-python-abi-platform.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "f!oo-1.2.3-python-abi-platform.whl" has an invalid package name"###);
}
#[test]
fn err_invalid_version() {
let err = WheelFilename::from_str("foo-x.y.z-python-abi-platform.whl").unwrap_err();
insta::assert_display_snapshot!(err, @r###"The wheel filename "foo-x.y.z-python-abi-platform.whl" has an invalid version part: Version `x.y.z` doesn't match PEP 440 rules"###);
}
#[test]
fn ok_single_tags() {
insta::assert_debug_snapshot!(WheelFilename::from_str("foo-1.2.3-foo-bar-baz.whl"));
}
#[test]
fn ok_multiple_tags() {
insta::assert_debug_snapshot!(WheelFilename::from_str(
"foo-1.2.3-ab.cd.ef-gh-ij.kl.mn.op.qr.st.whl"
));
}
#[test]
fn ok_build_tag() {
insta::assert_debug_snapshot!(WheelFilename::from_str(
"foo-1.2.3-build-python-abi-platform.whl"
));
}
}

View file

@ -10,4 +10,5 @@ authors = { workspace = true }
license = { workspace = true }
[dependencies]
fxhash = { workspace = true }
platform-host = { path = "../platform-host" }

View file

@ -1,14 +1,29 @@
use fxhash::{FxHashMap, FxHashSet};
use platform_host::{Arch, Os, Platform, PlatformError};
/// A set of compatible tags for a given Python version and platform, in
/// (`python_tag`, `abi_tag`, `platform_tag`) format.
/// A set of compatible tags for a given Python version and platform.
///
/// Its principle function is to determine whether the tags for a particular
/// wheel are compatible with the current environment.
#[derive(Debug)]
pub struct Tags(Vec<(String, String, String)>);
pub struct Tags {
/// python_tag |--> abi_tag |--> {platform_tag}
map: FxHashMap<String, FxHashMap<String, FxHashSet<String>>>,
}
impl Tags {
/// Create a new set of tags.
pub fn new(tags: Vec<(String, String, String)>) -> Self {
Self(tags)
let mut map = FxHashMap::default();
for (py, abi, platform) in tags {
map.entry(py.to_string())
.or_insert(FxHashMap::default())
.entry(abi.to_string())
.or_insert(FxHashSet::default())
.insert(platform.to_string());
}
Self { map }
}
/// Returns the compatible tags for the given Python version and platform.
@ -79,11 +94,41 @@ impl Tags {
"any".to_string(),
));
tags.sort();
Ok(Self(tags))
Ok(Self::new(tags))
}
pub fn iter(&self) -> impl Iterator<Item = &(String, String, String)> {
self.0.iter()
/// Returns true when there exists at least one tag for this platform
/// whose individal components all appear in each of the slices given.
pub fn is_compatible(
&self,
wheel_python_tags: &[String],
wheel_abi_tags: &[String],
wheel_platform_tags: &[String],
) -> bool {
// NOTE: A typical work-load is a context in which the platform tags
// are quite large, but the tags of a wheel are quite small. It is
// common, for example, for the lengths of the slices given to all be
// 1. So while the looping here might look slow, the key thing we want
// to avoid is looping over all of the platform tags. We avoid that
// with hashmap lookups.
let pythons = &self.map;
for wheel_py in wheel_python_tags {
let Some(abis) = pythons.get(wheel_py) else {
continue;
};
for wheel_abi in wheel_abi_tags {
let Some(platforms) = abis.get(wheel_abi) else {
continue;
};
for wheel_platform in wheel_platform_tags {
if platforms.contains(wheel_platform) {
return true;
}
}
}
}
false
}
}