mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 10:58:28 +00:00
make some things guaranteed to be deterministic (#1065)
This PR replaces a few uses of hash maps/sets with btree maps/sets and index maps/sets. This has the benefit of guaranteeing a deterministic order of iteration. I made these changes as part of looking into a flaky test. Unfortunately, I'm not optimistic that anything here will actually fix the flaky test, since I don't believe anything was actually dependent on the order of iteration.
This commit is contained in:
parent
1b3a3f4e80
commit
eebc2f340a
7 changed files with 53 additions and 17 deletions
|
@ -7,11 +7,12 @@ use url::Url;
|
|||
use pep440_rs::VersionSpecifiers;
|
||||
use pypi_types::{BaseUrl, DistInfoMetadata, File, Hashes, Yanked};
|
||||
|
||||
/// A parsed structure from PyPI "HTML" index format for a single package.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct SimpleHtml {
|
||||
/// The [`BaseUrl`] to which all relative URLs should be resolved.
|
||||
pub(crate) base: BaseUrl,
|
||||
/// The list of [`File`]s available for download.
|
||||
/// The list of [`File`]s available for download sorted by filename.
|
||||
pub(crate) files: Vec<File>,
|
||||
}
|
||||
|
||||
|
@ -37,13 +38,22 @@ impl SimpleHtml {
|
|||
);
|
||||
|
||||
// Parse each `<a>` tag, to extract the filename, hash, and URL.
|
||||
let files: Vec<File> = dom
|
||||
let mut files: Vec<File> = dom
|
||||
.nodes()
|
||||
.iter()
|
||||
.filter_map(|node| node.as_tag())
|
||||
.filter(|link| link.name().as_bytes() == b"a")
|
||||
.map(|link| Self::parse_anchor(link))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
// While it has not been positively observed, we sort the files
|
||||
// to ensure we have a defined ordering. Otherwise, if we rely on
|
||||
// the API to provide a stable ordering and doesn't, it can lead
|
||||
// non-deterministic behavior elsewhere. (This is somewhat hand-wavy
|
||||
// and a bit of a band-aide, since arguably, the order of this API
|
||||
// response probably shouldn't have an impact on things downstream from
|
||||
// this. That is, if something depends on ordering, then it should
|
||||
// probably be the thing that does the sorting.)
|
||||
files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename));
|
||||
|
||||
Ok(Self { base, files })
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ derivative = { workspace = true }
|
|||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
futures = { workspace = true }
|
||||
http-cache-semantics = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
owo-colors = { workspace = true }
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use std::collections::BTreeSet;
|
||||
use std::convert::Infallible;
|
||||
use std::fmt::Formatter;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use pubgrub::range::Range;
|
||||
use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter};
|
||||
use rustc_hash::FxHashMap;
|
||||
use thiserror::Error;
|
||||
use url::Url;
|
||||
|
||||
|
@ -112,7 +113,7 @@ impl From<pubgrub::error::PubGrubError<PubGrubPackage, Range<Version>, Infallibl
|
|||
ResolveError::NoSolution(NoSolutionError {
|
||||
derivation_tree,
|
||||
// The following should be populated before display for the best error messages
|
||||
available_versions: FxHashMap::default(),
|
||||
available_versions: IndexMap::default(),
|
||||
selector: None,
|
||||
python_requirement: None,
|
||||
})
|
||||
|
@ -131,7 +132,7 @@ impl From<pubgrub::error::PubGrubError<PubGrubPackage, Range<Version>, Infallibl
|
|||
#[derive(Debug)]
|
||||
pub struct NoSolutionError {
|
||||
derivation_tree: DerivationTree<PubGrubPackage, Range<Version>>,
|
||||
available_versions: FxHashMap<PubGrubPackage, Vec<Version>>,
|
||||
available_versions: IndexMap<PubGrubPackage, BTreeSet<Version>>,
|
||||
selector: Option<CandidateSelector>,
|
||||
python_requirement: Option<PythonRequirement>,
|
||||
}
|
||||
|
@ -170,19 +171,21 @@ impl NoSolutionError {
|
|||
python_requirement: &PythonRequirement,
|
||||
package_versions: &OnceMap<PackageName, VersionMap>,
|
||||
) -> Self {
|
||||
let mut available_versions = FxHashMap::default();
|
||||
let mut available_versions = IndexMap::default();
|
||||
for package in self.derivation_tree.packages() {
|
||||
match package {
|
||||
PubGrubPackage::Root(_) => {}
|
||||
PubGrubPackage::Python(PubGrubPython::Installed) => {
|
||||
available_versions.insert(
|
||||
package.clone(),
|
||||
vec![python_requirement.installed().clone()],
|
||||
BTreeSet::from([python_requirement.installed().clone()]),
|
||||
);
|
||||
}
|
||||
PubGrubPackage::Python(PubGrubPython::Target) => {
|
||||
available_versions
|
||||
.insert(package.clone(), vec![python_requirement.target().clone()]);
|
||||
available_versions.insert(
|
||||
package.clone(),
|
||||
BTreeSet::from([python_requirement.target().clone()]),
|
||||
);
|
||||
}
|
||||
PubGrubPackage::Package(name, ..) => {
|
||||
if let Some(entry) = package_versions.get(name) {
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeSet;
|
||||
use std::ops::Bound;
|
||||
|
||||
use derivative::Derivative;
|
||||
use indexmap::{IndexMap, IndexSet};
|
||||
use owo_colors::OwoColorize;
|
||||
use pep440_rs::Version;
|
||||
use pubgrub::range::Range;
|
||||
use pubgrub::report::{DerivationTree, Derived, External, ReportFormatter};
|
||||
use pubgrub::term::Term;
|
||||
use pubgrub::type_aliases::Map;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
|
||||
use crate::candidate_selector::CandidateSelector;
|
||||
use crate::prerelease_mode::PreReleaseStrategy;
|
||||
|
@ -20,7 +21,7 @@ use super::PubGrubPackage;
|
|||
#[derive(Debug)]
|
||||
pub(crate) struct PubGrubReportFormatter<'a> {
|
||||
/// The versions that were available for each package
|
||||
pub(crate) available_versions: &'a FxHashMap<PubGrubPackage, Vec<Version>>,
|
||||
pub(crate) available_versions: &'a IndexMap<PubGrubPackage, BTreeSet<Version>>,
|
||||
|
||||
/// The versions that were available for each package
|
||||
pub(crate) python_requirement: Option<&'a PythonRequirement>,
|
||||
|
@ -151,8 +152,8 @@ impl ReportFormatter<PubGrubPackage, Range<Version>> for PubGrubReportFormatter<
|
|||
[(package @ PubGrubPackage::Package(..), Term::Positive(range))] => {
|
||||
let range = range.simplify(
|
||||
self.available_versions
|
||||
.get(package)
|
||||
.unwrap_or(&vec![])
|
||||
.get(*package)
|
||||
.unwrap_or(&BTreeSet::new())
|
||||
.iter(),
|
||||
);
|
||||
format!(
|
||||
|
@ -163,8 +164,8 @@ impl ReportFormatter<PubGrubPackage, Range<Version>> for PubGrubReportFormatter<
|
|||
[(package @ PubGrubPackage::Package(..), Term::Negative(range))] => {
|
||||
let range = range.simplify(
|
||||
self.available_versions
|
||||
.get(package)
|
||||
.unwrap_or(&vec![])
|
||||
.get(*package)
|
||||
.unwrap_or(&BTreeSet::new())
|
||||
.iter(),
|
||||
);
|
||||
format!(
|
||||
|
@ -347,7 +348,7 @@ impl PubGrubReportFormatter<'_> {
|
|||
&self,
|
||||
derivation_tree: &DerivationTree<PubGrubPackage, Range<Version>>,
|
||||
selector: &CandidateSelector,
|
||||
) -> FxHashSet<PubGrubHint> {
|
||||
) -> IndexSet<PubGrubHint> {
|
||||
/// Returns `true` if pre-releases were allowed for a package.
|
||||
fn allowed_prerelease(package: &PubGrubPackage, selector: &CandidateSelector) -> bool {
|
||||
match selector.prerelease_strategy() {
|
||||
|
@ -371,7 +372,7 @@ impl PubGrubReportFormatter<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
let mut hints = FxHashSet::default();
|
||||
let mut hints = IndexSet::default();
|
||||
match derivation_tree {
|
||||
DerivationTree::External(external) => match external {
|
||||
External::NoVersions(package, set) => {
|
||||
|
|
|
@ -7,11 +7,30 @@ use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
|||
|
||||
use crate::lenient_requirement::LenientVersionSpecifiers;
|
||||
|
||||
/// A collection of "files" from `PyPI`'s JSON API for a single package.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct SimpleJson {
|
||||
/// The list of [`File`]s available for download sorted by filename.
|
||||
#[serde(deserialize_with = "sorted_simple_json_files")]
|
||||
pub files: Vec<File>,
|
||||
}
|
||||
|
||||
/// Deserializes a sequence of "simple" files from `PyPI` and ensures that they
|
||||
/// are sorted in a stable order.
|
||||
fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<File>, D::Error> {
|
||||
let mut files = <Vec<File>>::deserialize(d)?;
|
||||
// While it has not been positively observed, we sort the files
|
||||
// to ensure we have a defined ordering. Otherwise, if we rely on
|
||||
// the API to provide a stable ordering and doesn't, it can lead
|
||||
// non-deterministic behavior elsewhere. (This is somewhat hand-wavy
|
||||
// and a bit of a band-aide, since arguably, the order of this API
|
||||
// response probably shouldn't have an impact on things downstream from
|
||||
// this. That is, if something depends on ordering, then it should
|
||||
// probably be the thing that does the sorting.)
|
||||
files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename));
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
/// A single (remote) file belonging to a package, either a wheel or a source distribution.
|
||||
///
|
||||
/// <https://peps.python.org/pep-0691/#project-detail>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue