Use FxHash (#151)

This commit is contained in:
Charlie Marsh 2023-10-20 01:26:06 -04:00 committed by GitHub
parent 8001c792e7
commit 4645f79237
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 48 additions and 43 deletions

11
Cargo.lock generated
View file

@ -900,6 +900,15 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@ -1315,6 +1324,7 @@ dependencies = [
"data-encoding",
"fs-err",
"fs2",
"fxhash",
"glibc_version",
"goblin",
"indoc 2.0.4",
@ -2155,6 +2165,7 @@ dependencies = [
"bitflags 2.4.1",
"colored",
"futures",
"fxhash",
"insta",
"once_cell",
"pep440_rs 0.3.12",

View file

@ -28,6 +28,7 @@ flate2 = { version = "1.0.28" }
fs-err = { version = "2.9.0" }
fs2 = { version = "0.4.3" }
futures = { version = "0.3.28" }
fxhash = { version = "0.2.1" }
glibc_version = { version = "0.1.2" }
goblin = { version = "0.7.1" }
http-cache-reqwest = { version = "0.11.3" }
@ -38,6 +39,7 @@ mailparse = { version = "0.14.0" }
memchr = { version = "2.6.4" }
miette = { version = "5.10.0" }
once_cell = { version = "1.18.0" }
petgraph = { version = "0.6.4" }
platform-info = { version = "2.0.2" }
plist = { version = "1.5.0" }
pyproject-toml = { version = "0.7.0" }

View file

@ -26,6 +26,7 @@ csv = { workspace = true }
data-encoding = { workspace = true }
fs-err = { workspace = true }
fs2 = { workspace = true }
fxhash = { workspace = true }
glibc_version = { workspace = true }
goblin = { workspace = true }
mailparse = { workspace = true }

View file

@ -31,9 +31,6 @@ mod wheel;
pub enum Error {
#[error(transparent)]
IO(#[from] io::Error),
/// This shouldn't actually be possible to occur
#[error("Failed to serialize direct_url.json ಠ_ಠ")]
DirectUrlSerdeJson(#[source] serde_json::Error),
/// Tags/metadata didn't match platform
#[error("The wheel is incompatible with the current platform {os} {arch}")]
IncompatibleWheel { os: Os, arch: Arch },

View file

@ -1,21 +1,9 @@
use std::collections::{HashMap, HashSet};
use fxhash::FxHashSet;
use regex::Regex;
use serde::Serialize;
use crate::Error;
/// Minimal `direct_url.json` schema
///
/// <https://packaging.python.org/en/latest/specifications/direct-url/>
/// <https://www.python.org/dev/peps/pep-0610/>
#[derive(Serialize)]
struct DirectUrl {
#[allow(clippy::zero_sized_map_values)]
archive_info: HashMap<(), ()>,
url: String,
}
/// A script defining the name of the runnable entrypoint and the module and function that should be
/// run.
#[cfg(feature = "python_bindings")]
@ -57,12 +45,12 @@ impl Script {
.captures(value)
.ok_or_else(|| Error::InvalidWheel(format!("invalid console script: '{value}'")))?;
if let Some(script_extras) = captures.name("extras") {
let script_extras = script_extras
.as_str()
.split(',')
.map(|extra| extra.trim().to_string())
.collect::<HashSet<String>>();
if let Some(extras) = extras {
let script_extras = script_extras
.as_str()
.split(',')
.map(|extra| extra.trim().to_string())
.collect::<FxHashSet<String>>();
if !script_extras.is_subset(&extras.iter().cloned().collect()) {
return Ok(None);
}

View file

@ -1,4 +1,4 @@
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::ffi::OsString;
use std::io::{BufRead, BufReader, BufWriter, Cursor, Read, Seek, Write};
use std::path::{Path, PathBuf};
@ -9,6 +9,7 @@ use configparser::ini::Ini;
use data_encoding::BASE64URL_NOPAD;
use fs_err as fs;
use fs_err::{DirEntry, File};
use fxhash::{FxHashMap, FxHashSet};
use mailparse::MailHeaderMap;
use sha2::{Digest, Sha256};
use tempfile::tempdir;
@ -158,7 +159,7 @@ fn unpack_wheel_files<R: Read + Seek>(
// Cache the created parent dirs to avoid io calls
// When deactivating bytecode compilation and sha2 those were 5% of total runtime, with
// cache it 2.3%
let mut created_dirs = HashSet::new();
let mut created_dirs = FxHashSet::default();
// https://github.com/zip-rs/zip/blob/7edf2489d5cff8b80f02ee6fc5febf3efd0a9442/examples/extract.rs
for i in 0..archive.len() {
let mut file = archive
@ -858,8 +859,8 @@ pub fn read_record_file(record: &mut impl Read) -> Result<Vec<RecordEntry>, Erro
pub fn parse_key_value_file(
file: &mut impl Read,
debug_filename: &str,
) -> Result<HashMap<String, Vec<String>>, Error> {
let mut data: HashMap<String, Vec<String>> = HashMap::new();
) -> Result<FxHashMap<String, Vec<String>>, Error> {
let mut data: FxHashMap<String, Vec<String>> = FxHashMap::default();
let file = BufReader::new(file);
for (line_no, line) in file.lines().enumerate() {

View file

@ -23,12 +23,13 @@ anyhow = { workspace = true }
bitflags = { workspace = true }
colored = { workspace = true }
futures = { workspace = true }
fxhash = { workspace = true }
once_cell = { workspace = true }
petgraph = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
waitmap = { workspace = true }
petgraph = "0.6.4"
[dev-dependencies]
once_cell = { version = "1.18.0" }

View file

@ -1,6 +1,7 @@
use std::collections::{BTreeMap, HashMap};
use std::hash::BuildHasherDefault;
use colored::Colorize;
use fxhash::FxHashMap;
use petgraph::visit::EdgeRef;
use pubgrub::range::Range;
use pubgrub::solver::{Kind, State};
@ -49,11 +50,11 @@ impl PinnedPackage {
/// A set of packages pinned at specific versions.
#[derive(Debug, Default)]
pub struct Resolution(BTreeMap<PackageName, PinnedPackage>);
pub struct Resolution(FxHashMap<PackageName, PinnedPackage>);
impl Resolution {
/// Create a new resolution from the given pinned packages.
pub(crate) fn new(packages: BTreeMap<PackageName, PinnedPackage>) -> Self {
pub(crate) fn new(packages: FxHashMap<PackageName, PinnedPackage>) -> Self {
Self(packages)
}
@ -87,7 +88,7 @@ impl Graph {
/// Create a new graph from the resolved `PubGrub` state.
pub fn from_state(
selection: &SelectedDependencies<PubGrubPackage, PubGrubVersion>,
pins: &HashMap<PackageName, HashMap<Version, File>>,
pins: &FxHashMap<PackageName, FxHashMap<Version, File>>,
state: &State<PubGrubPackage, Range<PubGrubVersion>>,
) -> Self {
// TODO(charlie): petgraph is a really heavy and unnecessary dependency here. We should
@ -95,7 +96,8 @@ impl Graph {
let mut graph = petgraph::graph::Graph::with_capacity(selection.len(), selection.len());
// Add every package to the graph.
let mut inverse = HashMap::with_capacity(selection.len());
let mut inverse =
FxHashMap::with_capacity_and_hasher(selection.len(), BuildHasherDefault::default());
for (package, version) in selection {
let PubGrubPackage::Package(package_name, None) = package else {
continue;

View file

@ -2,7 +2,6 @@
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::str::FromStr;
use std::sync::Arc;
@ -10,6 +9,7 @@ use anyhow::Result;
use futures::channel::mpsc::UnboundedReceiver;
use futures::future::Either;
use futures::{pin_mut, FutureExt, StreamExt, TryFutureExt};
use fxhash::{FxHashMap, FxHashSet};
use pubgrub::error::PubGrubError;
use pubgrub::range::Range;
use pubgrub::solver::{Incompatibility, State};
@ -104,14 +104,14 @@ impl<'a> Resolver<'a> {
let root = PubGrubPackage::Root;
// Keep track of the packages for which we've requested metadata.
let mut requested_packages = HashSet::new();
let mut requested_versions = HashSet::new();
let mut pins = HashMap::new();
let mut requested_packages = FxHashSet::default();
let mut requested_versions = FxHashSet::default();
let mut pins = FxHashMap::default();
// Start the solve.
let mut state = State::init(root.clone(), MIN_VERSION.clone());
let mut added_dependencies: HashMap<PubGrubPackage, HashSet<PubGrubVersion>> =
HashMap::default();
let mut added_dependencies: FxHashMap<PubGrubPackage, FxHashSet<PubGrubVersion>> =
FxHashMap::default();
let mut next = root;
loop {
@ -243,8 +243,8 @@ impl<'a> Resolver<'a> {
async fn choose_package_version<T: Borrow<PubGrubPackage>, U: Borrow<Range<PubGrubVersion>>>(
&self,
mut potential_packages: Vec<(T, U)>,
pins: &mut HashMap<PackageName, HashMap<pep440_rs::Version, File>>,
in_flight: &mut HashSet<String>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>,
in_flight: &mut FxHashSet<String>,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<(T, Option<PubGrubVersion>), ResolveError> {
let mut selection = 0usize;
@ -373,8 +373,8 @@ impl<'a> Resolver<'a> {
&self,
package: &PubGrubPackage,
version: &PubGrubVersion,
pins: &mut HashMap<PackageName, HashMap<pep440_rs::Version, File>>,
requested_packages: &mut HashSet<PackageName>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>,
requested_packages: &mut FxHashSet<PackageName>,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<Dependencies, ResolveError> {
match package {

View file

@ -2,12 +2,13 @@
//!
//! This is similar to running `pip install` with the `--no-deps` flag.
use std::collections::BTreeMap;
use std::hash::BuildHasherDefault;
use std::str::FromStr;
use anyhow::Result;
use futures::future::Either;
use futures::{StreamExt, TryFutureExt};
use fxhash::FxHashMap;
use tracing::debug;
use pep440_rs::Version;
@ -81,7 +82,8 @@ impl<'a> WheelFinder<'a> {
}
// Resolve the requirements.
let mut resolution: BTreeMap<PackageName, PinnedPackage> = BTreeMap::new();
let mut resolution: FxHashMap<PackageName, PinnedPackage> =
FxHashMap::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default());
while let Some(chunk) = package_stream.next().await {
for result in chunk {