change global allocator to jemalloc (and mimalloc on Windows) (#399)

This copies the allocator configuration used in the Ruff project. In
particular, this gives us an instant 10% win when resolving the top 1K
PyPI packages:

    $ hyperfine \
"./target/profiling/puffin-dev-main resolve-many --cache-dir
cache-docker-no-build --no-build pypi_top_8k_flat.txt --limit 1000 2>
/dev/null" \
"./target/profiling/puffin-dev resolve-many --cache-dir
cache-docker-no-build --no-build pypi_top_8k_flat.txt --limit 1000 2>
/dev/null"
Benchmark 1: ./target/profiling/puffin-dev-main resolve-many --cache-dir
cache-docker-no-build --no-build pypi_top_8k_flat.txt --limit 1000 2>
/dev/null
Time (mean ± σ): 974.2 ms ± 26.4 ms [User: 17503.3 ms, System: 2205.3
ms]
      Range (min … max):   943.5 ms … 1015.9 ms    10 runs

Benchmark 2: ./target/profiling/puffin-dev resolve-many --cache-dir
cache-docker-no-build --no-build pypi_top_8k_flat.txt --limit 1000 2>
/dev/null
Time (mean ± σ): 883.1 ms ± 23.3 ms [User: 14626.1 ms, System: 2542.2
ms]
      Range (min … max):   849.5 ms … 916.9 ms    10 runs

    Summary
'./target/profiling/puffin-dev resolve-many --cache-dir
cache-docker-no-build --no-build pypi_top_8k_flat.txt --limit 1000 2>
/dev/null' ran
1.10 ± 0.04 times faster than './target/profiling/puffin-dev-main
resolve-many --cache-dir cache-docker-no-build --no-build
pypi_top_8k_flat.txt --limit 1000 2> /dev/null'

I was moved to do this because I noticed `malloc`/`free` taking up a
fairly sizeable percentage of time during light profiling.

As is becoming a pattern, it will be easier to review this
commit-by-commit.

Ref #396 (wouldn't call this issue fixed)

-----

I did also try adding a `smallvec` optimization to the
`Version::release` field, but it didn't bare any fruit. I still think
there is more to explore since the results I observed don't quite line
up with what I expect. (So probably either my mental model is off or my
measurement process is flawed.) You can see that attempt with a little
more explanation here:
f9528b4ecd

In the course of adding the `smallvec` optimization, I also shrunk the
`Version` fields from a `usize` to a `u32`. They should at least be a
fixed size integer since version numbers aren't used to index memory,
and I shrunk it to `u32` since it seems reasonable to assume that all
version numbers will be smaller than `2^32`.
This commit is contained in:
Andrew Gallant 2023-11-10 14:48:59 -05:00 committed by GitHub
parent d8408b1783
commit 63f7f65190
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 129 additions and 44 deletions

43
Cargo.lock generated
View file

@ -1646,6 +1646,16 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "libmimalloc-sys"
version = "0.1.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "libredox"
version = "0.0.1"
@ -1805,6 +1815,15 @@ dependencies = [
"syn 2.0.39",
]
[[package]]
name = "mimalloc"
version = "0.1.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c"
dependencies = [
"libmimalloc-sys",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -2332,6 +2351,7 @@ dependencies = [
"install-wheel-rs",
"itertools 0.11.0",
"miette",
"mimalloc",
"pep440_rs 0.3.12",
"pep508_rs",
"platform-host",
@ -2351,6 +2371,7 @@ dependencies = [
"requirements-txt",
"tempfile",
"thiserror",
"tikv-jemallocator",
"tokio",
"toml 0.8.8",
"tracing",
@ -2403,6 +2424,7 @@ dependencies = [
"gourgeist",
"indicatif",
"itertools 0.11.0",
"mimalloc",
"pep508_rs",
"platform-host",
"platform-tags",
@ -2413,6 +2435,7 @@ dependencies = [
"puffin-traits",
"pypi-types",
"tempfile",
"tikv-jemallocator",
"tokio",
"tracing",
"tracing-indicatif",
@ -3535,6 +3558,26 @@ dependencies = [
"once_cell",
]
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.4+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]]
name = "time"
version = "0.3.30"

View file

@ -213,7 +213,7 @@ pub enum LocalSegment {
/// Not-parseable as integer segment of local version
String(String),
/// Inferred integer segment of local version
Number(usize),
Number(u32),
}
impl Display for LocalSegment {
@ -236,7 +236,7 @@ impl FromStr for LocalSegment {
type Err = ();
fn from_str(segment: &str) -> Result<Self, Self::Err> {
Ok(if let Ok(number) = segment.parse::<usize>() {
Ok(if let Ok(number) = segment.parse::<u32>() {
Self::Number(number)
} else {
// "and if a segment contains any ASCII letters then that segment is compared lexicographically with case insensitivity"
@ -263,25 +263,25 @@ impl FromStr for LocalSegment {
pub struct Version {
/// The [versioning epoch](https://peps.python.org/pep-0440/#version-epochs). Normally just 0,
/// but you can increment it if you switched the versioning scheme.
pub epoch: usize,
pub epoch: u32,
/// The normal number part of the version
/// (["final release"](https://peps.python.org/pep-0440/#final-releases)),
/// such a `1.2.3` in `4!1.2.3-a8.post9.dev1`
///
/// Note that we drop the * placeholder by moving it to `Operator`
pub release: Vec<usize>,
pub release: Vec<u32>,
/// The [prerelease](https://peps.python.org/pep-0440/#pre-releases), i.e. alpha, beta or rc
/// plus a number
///
/// Note that whether this is Some influences the version
/// range matching since normally we exclude all prerelease versions
pub pre: Option<(PreRelease, usize)>,
pub pre: Option<(PreRelease, u32)>,
/// The [Post release version](https://peps.python.org/pep-0440/#post-releases),
/// higher post version are preferred over lower post or none-post versions
pub post: Option<usize>,
pub post: Option<u32>,
/// The [developmental release](https://peps.python.org/pep-0440/#developmental-releases),
/// if any
pub dev: Option<usize>,
pub dev: Option<u32>,
/// A [local version identifier](https://peps.python.org/pep-0440/#local-version-identifiers)
/// such as `+deadbeef` in `1.2.3+deadbeef`
///
@ -318,7 +318,7 @@ impl PyVersion {
/// The [versioning epoch](https://peps.python.org/pep-0440/#version-epochs). Normally just 0,
/// but you can increment it if you switched the versioning scheme.
#[getter]
pub fn epoch(&self) -> usize {
pub fn epoch(&self) -> u32 {
self.0.epoch
}
/// The normal number part of the version
@ -327,7 +327,7 @@ impl PyVersion {
///
/// Note that we drop the * placeholder by moving it to `Operator`
#[getter]
pub fn release(&self) -> Vec<usize> {
pub fn release(&self) -> Vec<u32> {
self.0.release.clone()
}
/// The [prerelease](https://peps.python.org/pep-0440/#pre-releases), i.e. alpha, beta or rc
@ -336,36 +336,36 @@ impl PyVersion {
/// Note that whether this is Some influences the version
/// range matching since normally we exclude all prerelease versions
#[getter]
pub fn pre(&self) -> Option<(PreRelease, usize)> {
pub fn pre(&self) -> Option<(PreRelease, u32)> {
self.0.pre.clone()
}
/// The [Post release version](https://peps.python.org/pep-0440/#post-releases),
/// higher post version are preferred over lower post or none-post versions
#[getter]
pub fn post(&self) -> Option<usize> {
pub fn post(&self) -> Option<u32> {
self.0.post
}
/// The [developmental release](https://peps.python.org/pep-0440/#developmental-releases),
/// if any
#[getter]
pub fn dev(&self) -> Option<usize> {
pub fn dev(&self) -> Option<u32> {
self.0.dev
}
/// The first item of release or 0 if unavailable.
#[getter]
#[allow(clippy::get_first)]
pub fn major(&self) -> usize {
self.release().get(0).copied().unwrap_or_default()
pub fn major(&self) -> u32 {
self.0.release.get(0).copied().unwrap_or_default()
}
/// The second item of release or 0 if unavailable.
#[getter]
pub fn minor(&self) -> usize {
self.release().get(1).copied().unwrap_or_default()
pub fn minor(&self) -> u32 {
self.0.release.get(1).copied().unwrap_or_default()
}
/// The third item of release or 0 if unavailable.
#[getter]
pub fn micro(&self) -> usize {
self.release().get(2).copied().unwrap_or_default()
pub fn micro(&self) -> u32 {
self.0.release.get(2).copied().unwrap_or_default()
}
/// Parses a PEP 440 version string
@ -442,7 +442,7 @@ impl Serialize for Version {
impl Version {
/// Constructor for a version that is just a release such as `3.8`
pub fn from_release(release: Vec<usize>) -> Self {
pub fn from_release(release: Vec<u32>) -> Self {
Self {
epoch: 0,
release,
@ -535,7 +535,7 @@ impl Display for Version {
/// Compare the release parts of two versions, e.g. `4.3.1` > `4.2`, `1.1.0` == `1.1` and
/// `1.16` < `1.19`
pub(crate) fn compare_release(this: &[usize], other: &[usize]) -> Ordering {
pub(crate) fn compare_release(this: &[u32], other: &[u32]) -> Ordering {
// "When comparing release segments with different numbers of components, the shorter segment
// is padded out with additional zeros as necessary"
for (this, other) in this.iter().chain(iter::repeat(&0)).zip(
@ -568,9 +568,7 @@ pub(crate) fn compare_release(this: &[usize], other: &[usize]) -> Ordering {
/// For post, any number is better than none (so None defaults to None<0), but for dev, no number
/// is better (so None default to the maximum). For local the Option<Vec<T>> luckily already has the
/// correct default Ord implementation
fn sortable_tuple(
version: &Version,
) -> (usize, usize, Option<usize>, usize, Option<&[LocalSegment]>) {
fn sortable_tuple(version: &Version) -> (u32, u32, Option<u32>, u32, Option<&[LocalSegment]>) {
match (&version.pre, &version.post, &version.dev) {
// dev release
(None, None, Some(n)) => (0, 0, None, *n, version.local.as_deref()),
@ -579,7 +577,7 @@ fn sortable_tuple(
1,
*n,
*post,
dev.unwrap_or(usize::MAX),
dev.unwrap_or(u32::MAX),
version.local.as_deref(),
),
// beta release
@ -587,7 +585,7 @@ fn sortable_tuple(
2,
*n,
*post,
dev.unwrap_or(usize::MAX),
dev.unwrap_or(u32::MAX),
version.local.as_deref(),
),
// alpha release
@ -595,7 +593,7 @@ fn sortable_tuple(
3,
*n,
*post,
dev.unwrap_or(usize::MAX),
dev.unwrap_or(u32::MAX),
version.local.as_deref(),
),
// final release
@ -605,7 +603,7 @@ fn sortable_tuple(
5,
0,
Some(*post),
dev.unwrap_or(usize::MAX),
dev.unwrap_or(u32::MAX),
version.local.as_deref(),
),
}
@ -720,7 +718,7 @@ impl Version {
pub(crate) fn parse_impl(captures: &Captures) -> Result<(Version, bool), String> {
let number_field = |field_name| {
if let Some(field_str) = captures.name(field_name) {
match field_str.as_str().parse::<usize>() {
match field_str.as_str().parse::<u32>() {
Ok(number) => Ok(Some(number)),
// Should be already forbidden by the regex
Err(err) => Err(format!(
@ -771,7 +769,7 @@ impl Version {
.as_str()
.split(&['-', '_', '.'][..])
.map(|segment| {
if let Ok(number) = segment.parse::<usize>() {
if let Ok(number) = segment.parse::<u32>() {
LocalSegment::Number(number)
} else {
// "and if a segment contains any ASCII letters then that segment is compared lexicographically with case insensitivity"
@ -786,8 +784,8 @@ impl Version {
.ok_or_else(|| "No release in version".to_string())?
.as_str()
.split('.')
.map(|segment| segment.parse::<usize>().map_err(|err| err.to_string()))
.collect::<Result<Vec<usize>, String>>()?;
.map(|segment| segment.parse::<u32>().map_err(|err| err.to_string()))
.collect::<Result<Vec<u32>, String>>()?;
let star = captures.name("trailing_dot_star").is_some();
if star {

View file

@ -907,9 +907,9 @@ mod tests {
fn error_empty() {
assert_err(
"",
indoc! {"
indoc! {"\
Empty field is not allowed for PEP508
^"
},
);
@ -1123,7 +1123,7 @@ mod tests {
"numpy ( >=1.19 ",
indoc! {"
Missing closing parenthesis (expected ')', found end of dependency specification)
numpy ( >=1.19
numpy ( >=1.19\x20
^"
},
);
@ -1212,9 +1212,9 @@ mod tests {
fn error_marker_incomplete2() {
assert_err(
r#"numpy; sys_platform == "#,
indoc! {"
indoc! {"\
Expected marker value, found end of dependency specification
numpy; sys_platform ==
numpy; sys_platform ==\x20
^"
},
);
@ -1224,10 +1224,10 @@ mod tests {
fn error_marker_incomplete3() {
assert_err(
r#"numpy; sys_platform == "win32" or "#,
indoc! {r#"
indoc! {"
Expected marker value, found end of dependency specification
numpy; sys_platform == "win32" or
^"#},
numpy; sys_platform == \"win32\" or\x20
^"},
);
}
@ -1246,10 +1246,10 @@ mod tests {
fn error_marker_incomplete5() {
assert_err(
r#"numpy; sys_platform == "win32" or (os_name == "linux" and "#,
indoc! {r#"
indoc! {"
Expected marker value, found end of dependency specification
numpy; sys_platform == "win32" or (os_name == "linux" and
^"#},
numpy; sys_platform == \"win32\" or (os_name == \"linux\" and\x20
^"},
);
}
@ -1331,7 +1331,7 @@ mod tests {
r#"name @ "#,
indoc! {"
Expected URL
name @
name @\x20
^"
},
);

View file

@ -54,6 +54,12 @@ tracing-tree = { workspace = true }
url = { workspace = true }
which = { workspace = true }
[target.'cfg(target_os = "windows")'.dependencies]
mimalloc = "0.1.39"
[target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dependencies]
tikv-jemallocator = "0.5.0"
[dev-dependencies]
assert_cmd = { version = "2.0.12" }
assert_fs = { version = "1.0.13" }

View file

@ -18,6 +18,22 @@ use crate::index_urls::IndexUrls;
use crate::python_version::PythonVersion;
use crate::requirements::RequirementsSource;
#[cfg(target_os = "windows")]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
#[cfg(all(
not(target_os = "windows"),
not(target_os = "openbsd"),
any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "powerpc64"
)
))]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
mod commands;
mod index_urls;
mod logging;

View file

@ -38,3 +38,9 @@ tracing-indicatif = { workspace = true }
tracing-subscriber = { workspace = true }
which = { workspace = true }
url = { workspace = true }
[target.'cfg(target_os = "windows")'.dependencies]
mimalloc = "0.1.39"
[target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dependencies]
tikv-jemallocator = "0.5.0"

View file

@ -18,6 +18,22 @@ use crate::build::{build, BuildArgs};
use crate::resolve_cli::ResolveCliArgs;
use crate::wheel_metadata::WheelMetadataArgs;
#[cfg(target_os = "windows")]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
#[cfg(all(
not(target_os = "windows"),
not(target_os = "openbsd"),
any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "powerpc64"
)
))]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
mod build;
mod resolve_cli;
mod resolve_many;

View file

@ -71,7 +71,7 @@ impl TryFrom<&VersionSpecifier> for PubGrubSpecifier {
} else if let Some(post) = low.post {
low.post = Some(post + 1);
} else {
low.post = Some(usize::MAX);
low.post = Some(u32::MAX);
}
let version = PubGrubVersion::from(specifier.version().clone());
Range::strictly_higher_than(version)

0
scripts/resolve/get_pypi_top_8k.sh Normal file → Executable file
View file