make some things guaranteed to be deterministic (#1065)

This PR replaces a few uses of hash maps/sets with btree maps/sets and
index maps/sets. This has the benefit of guaranteeing a deterministic
order of iteration.

I made these changes as part of looking into a flaky test.
Unfortunately, I'm not optimistic that anything here will actually fix
the flaky test, since I don't believe anything was actually dependent
on the order of iteration.
This commit is contained in:
Andrew Gallant 2024-01-23 20:30:33 -05:00 committed by GitHub
parent 1b3a3f4e80
commit eebc2f340a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 53 additions and 17 deletions

1
Cargo.lock generated
View file

@ -2653,6 +2653,7 @@ dependencies = [
"futures", "futures",
"gourgeist", "gourgeist",
"http-cache-semantics", "http-cache-semantics",
"indexmap 2.1.0",
"insta", "insta",
"install-wheel-rs", "install-wheel-rs",
"itertools 0.12.0", "itertools 0.12.0",

View file

@ -42,6 +42,7 @@ hex = { version = "0.4.3" }
html-escape = { version = "0.2.13" } html-escape = { version = "0.2.13" }
http = { version = "0.2.11" } http = { version = "0.2.11" }
http-cache-semantics = { version = "1.0.2" } http-cache-semantics = { version = "1.0.2" }
indexmap = { version = "2.1.0" }
indicatif = { version = "0.17.7" } indicatif = { version = "0.17.7" }
indoc = { version = "2.0.4" } indoc = { version = "2.0.4" }
itertools = { version = "0.12.0" } itertools = { version = "0.12.0" }

View file

@ -7,11 +7,12 @@ use url::Url;
use pep440_rs::VersionSpecifiers; use pep440_rs::VersionSpecifiers;
use pypi_types::{BaseUrl, DistInfoMetadata, File, Hashes, Yanked}; use pypi_types::{BaseUrl, DistInfoMetadata, File, Hashes, Yanked};
/// A parsed structure from PyPI "HTML" index format for a single package.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub(crate) struct SimpleHtml { pub(crate) struct SimpleHtml {
/// The [`BaseUrl`] to which all relative URLs should be resolved. /// The [`BaseUrl`] to which all relative URLs should be resolved.
pub(crate) base: BaseUrl, pub(crate) base: BaseUrl,
/// The list of [`File`]s available for download. /// The list of [`File`]s available for download sorted by filename.
pub(crate) files: Vec<File>, pub(crate) files: Vec<File>,
} }
@ -37,13 +38,22 @@ impl SimpleHtml {
); );
// Parse each `<a>` tag, to extract the filename, hash, and URL. // Parse each `<a>` tag, to extract the filename, hash, and URL.
let files: Vec<File> = dom let mut files: Vec<File> = dom
.nodes() .nodes()
.iter() .iter()
.filter_map(|node| node.as_tag()) .filter_map(|node| node.as_tag())
.filter(|link| link.name().as_bytes() == b"a") .filter(|link| link.name().as_bytes() == b"a")
.map(|link| Self::parse_anchor(link)) .map(|link| Self::parse_anchor(link))
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
// While it has not been positively observed, we sort the files
// to ensure we have a defined ordering. Otherwise, if we rely on
// the API to provide a stable ordering and doesn't, it can lead
// non-deterministic behavior elsewhere. (This is somewhat hand-wavy
// and a bit of a band-aide, since arguably, the order of this API
// response probably shouldn't have an impact on things downstream from
// this. That is, if something depends on ordering, then it should
// probably be the thing that does the sorting.)
files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename));
Ok(Self { base, files }) Ok(Self { base, files })
} }

View file

@ -43,6 +43,7 @@ derivative = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true } futures = { workspace = true }
http-cache-semantics = { workspace = true } http-cache-semantics = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true } itertools = { workspace = true }
once_cell = { workspace = true } once_cell = { workspace = true }
owo-colors = { workspace = true } owo-colors = { workspace = true }

View file

@ -1,9 +1,10 @@
use std::collections::BTreeSet;
use std::convert::Infallible; use std::convert::Infallible;
use std::fmt::Formatter; use std::fmt::Formatter;
use indexmap::IndexMap;
use pubgrub::range::Range; use pubgrub::range::Range;
use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter}; use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter};
use rustc_hash::FxHashMap;
use thiserror::Error; use thiserror::Error;
use url::Url; use url::Url;
@ -112,7 +113,7 @@ impl From<pubgrub::error::PubGrubError<PubGrubPackage, Range<Version>, Infallibl
ResolveError::NoSolution(NoSolutionError { ResolveError::NoSolution(NoSolutionError {
derivation_tree, derivation_tree,
// The following should be populated before display for the best error messages // The following should be populated before display for the best error messages
available_versions: FxHashMap::default(), available_versions: IndexMap::default(),
selector: None, selector: None,
python_requirement: None, python_requirement: None,
}) })
@ -131,7 +132,7 @@ impl From<pubgrub::error::PubGrubError<PubGrubPackage, Range<Version>, Infallibl
#[derive(Debug)] #[derive(Debug)]
pub struct NoSolutionError { pub struct NoSolutionError {
derivation_tree: DerivationTree<PubGrubPackage, Range<Version>>, derivation_tree: DerivationTree<PubGrubPackage, Range<Version>>,
available_versions: FxHashMap<PubGrubPackage, Vec<Version>>, available_versions: IndexMap<PubGrubPackage, BTreeSet<Version>>,
selector: Option<CandidateSelector>, selector: Option<CandidateSelector>,
python_requirement: Option<PythonRequirement>, python_requirement: Option<PythonRequirement>,
} }
@ -170,19 +171,21 @@ impl NoSolutionError {
python_requirement: &PythonRequirement, python_requirement: &PythonRequirement,
package_versions: &OnceMap<PackageName, VersionMap>, package_versions: &OnceMap<PackageName, VersionMap>,
) -> Self { ) -> Self {
let mut available_versions = FxHashMap::default(); let mut available_versions = IndexMap::default();
for package in self.derivation_tree.packages() { for package in self.derivation_tree.packages() {
match package { match package {
PubGrubPackage::Root(_) => {} PubGrubPackage::Root(_) => {}
PubGrubPackage::Python(PubGrubPython::Installed) => { PubGrubPackage::Python(PubGrubPython::Installed) => {
available_versions.insert( available_versions.insert(
package.clone(), package.clone(),
vec![python_requirement.installed().clone()], BTreeSet::from([python_requirement.installed().clone()]),
); );
} }
PubGrubPackage::Python(PubGrubPython::Target) => { PubGrubPackage::Python(PubGrubPython::Target) => {
available_versions available_versions.insert(
.insert(package.clone(), vec![python_requirement.target().clone()]); package.clone(),
BTreeSet::from([python_requirement.target().clone()]),
);
} }
PubGrubPackage::Package(name, ..) => { PubGrubPackage::Package(name, ..) => {
if let Some(entry) = package_versions.get(name) { if let Some(entry) = package_versions.get(name) {

View file

@ -1,15 +1,16 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::ops::Bound; use std::ops::Bound;
use derivative::Derivative; use derivative::Derivative;
use indexmap::{IndexMap, IndexSet};
use owo_colors::OwoColorize; use owo_colors::OwoColorize;
use pep440_rs::Version; use pep440_rs::Version;
use pubgrub::range::Range; use pubgrub::range::Range;
use pubgrub::report::{DerivationTree, Derived, External, ReportFormatter}; use pubgrub::report::{DerivationTree, Derived, External, ReportFormatter};
use pubgrub::term::Term; use pubgrub::term::Term;
use pubgrub::type_aliases::Map; use pubgrub::type_aliases::Map;
use rustc_hash::{FxHashMap, FxHashSet};
use crate::candidate_selector::CandidateSelector; use crate::candidate_selector::CandidateSelector;
use crate::prerelease_mode::PreReleaseStrategy; use crate::prerelease_mode::PreReleaseStrategy;
@ -20,7 +21,7 @@ use super::PubGrubPackage;
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct PubGrubReportFormatter<'a> { pub(crate) struct PubGrubReportFormatter<'a> {
/// The versions that were available for each package /// The versions that were available for each package
pub(crate) available_versions: &'a FxHashMap<PubGrubPackage, Vec<Version>>, pub(crate) available_versions: &'a IndexMap<PubGrubPackage, BTreeSet<Version>>,
/// The versions that were available for each package /// The versions that were available for each package
pub(crate) python_requirement: Option<&'a PythonRequirement>, pub(crate) python_requirement: Option<&'a PythonRequirement>,
@ -151,8 +152,8 @@ impl ReportFormatter<PubGrubPackage, Range<Version>> for PubGrubReportFormatter<
[(package @ PubGrubPackage::Package(..), Term::Positive(range))] => { [(package @ PubGrubPackage::Package(..), Term::Positive(range))] => {
let range = range.simplify( let range = range.simplify(
self.available_versions self.available_versions
.get(package) .get(*package)
.unwrap_or(&vec![]) .unwrap_or(&BTreeSet::new())
.iter(), .iter(),
); );
format!( format!(
@ -163,8 +164,8 @@ impl ReportFormatter<PubGrubPackage, Range<Version>> for PubGrubReportFormatter<
[(package @ PubGrubPackage::Package(..), Term::Negative(range))] => { [(package @ PubGrubPackage::Package(..), Term::Negative(range))] => {
let range = range.simplify( let range = range.simplify(
self.available_versions self.available_versions
.get(package) .get(*package)
.unwrap_or(&vec![]) .unwrap_or(&BTreeSet::new())
.iter(), .iter(),
); );
format!( format!(
@ -347,7 +348,7 @@ impl PubGrubReportFormatter<'_> {
&self, &self,
derivation_tree: &DerivationTree<PubGrubPackage, Range<Version>>, derivation_tree: &DerivationTree<PubGrubPackage, Range<Version>>,
selector: &CandidateSelector, selector: &CandidateSelector,
) -> FxHashSet<PubGrubHint> { ) -> IndexSet<PubGrubHint> {
/// Returns `true` if pre-releases were allowed for a package. /// Returns `true` if pre-releases were allowed for a package.
fn allowed_prerelease(package: &PubGrubPackage, selector: &CandidateSelector) -> bool { fn allowed_prerelease(package: &PubGrubPackage, selector: &CandidateSelector) -> bool {
match selector.prerelease_strategy() { match selector.prerelease_strategy() {
@ -371,7 +372,7 @@ impl PubGrubReportFormatter<'_> {
} }
} }
let mut hints = FxHashSet::default(); let mut hints = IndexSet::default();
match derivation_tree { match derivation_tree {
DerivationTree::External(external) => match external { DerivationTree::External(external) => match external {
External::NoVersions(package, set) => { External::NoVersions(package, set) => {

View file

@ -7,11 +7,30 @@ use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use crate::lenient_requirement::LenientVersionSpecifiers; use crate::lenient_requirement::LenientVersionSpecifiers;
/// A collection of "files" from `PyPI`'s JSON API for a single package.
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct SimpleJson { pub struct SimpleJson {
/// The list of [`File`]s available for download sorted by filename.
#[serde(deserialize_with = "sorted_simple_json_files")]
pub files: Vec<File>, pub files: Vec<File>,
} }
/// Deserializes a sequence of "simple" files from `PyPI` and ensures that they
/// are sorted in a stable order.
fn sorted_simple_json_files<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<File>, D::Error> {
let mut files = <Vec<File>>::deserialize(d)?;
// While it has not been positively observed, we sort the files
// to ensure we have a defined ordering. Otherwise, if we rely on
// the API to provide a stable ordering and doesn't, it can lead
// non-deterministic behavior elsewhere. (This is somewhat hand-wavy
// and a bit of a band-aide, since arguably, the order of this API
// response probably shouldn't have an impact on things downstream from
// this. That is, if something depends on ordering, then it should
// probably be the thing that does the sorting.)
files.sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename));
Ok(files)
}
/// A single (remote) file belonging to a package, either a wheel or a source distribution. /// A single (remote) file belonging to a package, either a wheel or a source distribution.
/// ///
/// <https://peps.python.org/pep-0691/#project-detail> /// <https://peps.python.org/pep-0691/#project-detail>