make some things guaranteed to be deterministic (#1065)

This PR replaces a few uses of hash maps/sets with btree maps/sets and
index maps/sets. This has the benefit of guaranteeing a deterministic
order of iteration.

I made these changes as part of looking into a flaky test.
Unfortunately, I'm not optimistic that anything here will actually fix
the flaky test, since I don't believe anything was actually dependent
on the order of iteration.
This commit is contained in:
Andrew Gallant 2024-01-23 20:30:33 -05:00 committed by GitHub
parent 1b3a3f4e80
commit eebc2f340a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 53 additions and 17 deletions

View file

@ -7,11 +7,12 @@ use url::Url;
use pep440_rs::VersionSpecifiers;
use pypi_types::{BaseUrl, DistInfoMetadata, File, Hashes, Yanked};
/// A parsed structure from PyPI "HTML" index format for a single package.
#[derive(Debug, Clone)]
pub(crate) struct SimpleHtml {
/// The [`BaseUrl`] to which all relative URLs should be resolved.
pub(crate) base: BaseUrl,
/// The list of [`File`]s available for download.
/// The list of [`File`]s available for download sorted by filename.
pub(crate) files: Vec<File>,
}
@ -37,13 +38,22 @@ impl SimpleHtml {
);
// Parse each `<a>` tag, to extract the filename, hash, and URL.
let files: Vec<File> = dom
let mut files: Vec<File> = dom
.nodes()
.iter()
.filter_map(|node| node.as_tag())
.filter(|link| link.name().as_bytes() == b"a")
.map(|link| Self::parse_anchor(link))
.collect::<Result<Vec<_>, _>>()?;
// While it has not been positively observed, we sort the files
// to ensure we have a defined ordering. Otherwise, if we rely on
// the API to provide a stable ordering and doesn't, it can lead
// non-deterministic behavior elsewhere. (This is somewhat hand-wavy
// and a bit of a band-aide, since arguably, the order of this API
// response probably shouldn't have an impact on things downstream from
// this. That is, if something depends on ordering, then it should
// probably be the thing that does the sorting.)
files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename));
Ok(Self { base, files })
}

View file

@ -43,6 +43,7 @@ derivative = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
http-cache-semantics = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
once_cell = { workspace = true }
owo-colors = { workspace = true }

View file

@ -1,9 +1,10 @@
use std::collections::BTreeSet;
use std::convert::Infallible;
use std::fmt::Formatter;
use indexmap::IndexMap;
use pubgrub::range::Range;
use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter};
use rustc_hash::FxHashMap;
use thiserror::Error;
use url::Url;
@ -112,7 +113,7 @@ impl From<pubgrub::error::PubGrubError<PubGrubPackage, Range<Version>, Infallibl
ResolveError::NoSolution(NoSolutionError {
derivation_tree,
// The following should be populated before display for the best error messages
available_versions: FxHashMap::default(),
available_versions: IndexMap::default(),
selector: None,
python_requirement: None,
})
@ -131,7 +132,7 @@ impl From<pubgrub::error::PubGrubError<PubGrubPackage, Range<Version>, Infallibl
#[derive(Debug)]
pub struct NoSolutionError {
derivation_tree: DerivationTree<PubGrubPackage, Range<Version>>,
available_versions: FxHashMap<PubGrubPackage, Vec<Version>>,
available_versions: IndexMap<PubGrubPackage, BTreeSet<Version>>,
selector: Option<CandidateSelector>,
python_requirement: Option<PythonRequirement>,
}
@ -170,19 +171,21 @@ impl NoSolutionError {
python_requirement: &PythonRequirement,
package_versions: &OnceMap<PackageName, VersionMap>,
) -> Self {
let mut available_versions = FxHashMap::default();
let mut available_versions = IndexMap::default();
for package in self.derivation_tree.packages() {
match package {
PubGrubPackage::Root(_) => {}
PubGrubPackage::Python(PubGrubPython::Installed) => {
available_versions.insert(
package.clone(),
vec![python_requirement.installed().clone()],
BTreeSet::from([python_requirement.installed().clone()]),
);
}
PubGrubPackage::Python(PubGrubPython::Target) => {
available_versions
.insert(package.clone(), vec![python_requirement.target().clone()]);
available_versions.insert(
package.clone(),
BTreeSet::from([python_requirement.target().clone()]),
);
}
PubGrubPackage::Package(name, ..) => {
if let Some(entry) = package_versions.get(name) {

View file

@ -1,15 +1,16 @@
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::ops::Bound;
use derivative::Derivative;
use indexmap::{IndexMap, IndexSet};
use owo_colors::OwoColorize;
use pep440_rs::Version;
use pubgrub::range::Range;
use pubgrub::report::{DerivationTree, Derived, External, ReportFormatter};
use pubgrub::term::Term;
use pubgrub::type_aliases::Map;
use rustc_hash::{FxHashMap, FxHashSet};
use crate::candidate_selector::CandidateSelector;
use crate::prerelease_mode::PreReleaseStrategy;
@ -20,7 +21,7 @@ use super::PubGrubPackage;
#[derive(Debug)]
pub(crate) struct PubGrubReportFormatter<'a> {
/// The versions that were available for each package
pub(crate) available_versions: &'a FxHashMap<PubGrubPackage, Vec<Version>>,
pub(crate) available_versions: &'a IndexMap<PubGrubPackage, BTreeSet<Version>>,
/// The versions that were available for each package
pub(crate) python_requirement: Option<&'a PythonRequirement>,
@ -151,8 +152,8 @@ impl ReportFormatter<PubGrubPackage, Range<Version>> for PubGrubReportFormatter<
[(package @ PubGrubPackage::Package(..), Term::Positive(range))] => {
let range = range.simplify(
self.available_versions
.get(package)
.unwrap_or(&vec![])
.get(*package)
.unwrap_or(&BTreeSet::new())
.iter(),
);
format!(
@ -163,8 +164,8 @@ impl ReportFormatter<PubGrubPackage, Range<Version>> for PubGrubReportFormatter<
[(package @ PubGrubPackage::Package(..), Term::Negative(range))] => {
let range = range.simplify(
self.available_versions
.get(package)
.unwrap_or(&vec![])
.get(*package)
.unwrap_or(&BTreeSet::new())
.iter(),
);
format!(
@ -347,7 +348,7 @@ impl PubGrubReportFormatter<'_> {
&self,
derivation_tree: &DerivationTree<PubGrubPackage, Range<Version>>,
selector: &CandidateSelector,
) -> FxHashSet<PubGrubHint> {
) -> IndexSet<PubGrubHint> {
/// Returns `true` if pre-releases were allowed for a package.
fn allowed_prerelease(package: &PubGrubPackage, selector: &CandidateSelector) -> bool {
match selector.prerelease_strategy() {
@ -371,7 +372,7 @@ impl PubGrubReportFormatter<'_> {
}
}
let mut hints = FxHashSet::default();
let mut hints = IndexSet::default();
match derivation_tree {
DerivationTree::External(external) => match external {
External::NoVersions(package, set) => {

View file

@ -7,11 +7,30 @@ use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use crate::lenient_requirement::LenientVersionSpecifiers;
/// A collection of "files" from `PyPI`'s JSON API for a single package.
#[derive(Debug, Clone, Deserialize)]
pub struct SimpleJson {
/// The list of [`File`]s available for download sorted by filename.
#[serde(deserialize_with = "sorted_simple_json_files")]
pub files: Vec<File>,
}
/// Deserializes a sequence of "simple" files from `PyPI` and ensures that they
/// are sorted in a stable order.
fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<File>, D::Error> {
let mut files = <Vec<File>>::deserialize(d)?;
// While it has not been positively observed, we sort the files
// to ensure we have a defined ordering. Otherwise, if we rely on
// the API to provide a stable ordering and doesn't, it can lead
// non-deterministic behavior elsewhere. (This is somewhat hand-wavy
// and a bit of a band-aide, since arguably, the order of this API
// response probably shouldn't have an impact on things downstream from
// this. That is, if something depends on ordering, then it should
// probably be the thing that does the sorting.)
files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename));
Ok(files)
}
/// A single (remote) file belonging to a package, either a wheel or a source distribution.
///
/// <https://peps.python.org/pep-0691/#project-detail>