Set 4MB stack size for all threads, introduce UV_STACK_SIZE (#12839)

See #12769 for the motivation. We set the 4MB not only for the main
thread, but also for all tokio and rayon threads to fix a stack overflow
while unpacking wheels in production on Windows.

There are two variables for setting the stack size: A new
`UV_STACK_SIZE` that takes precedent, and the existing `RUST_MIN_STACK`.
When setting the stack size, `UV_STACK_SIZE` should be preferred, since
`RUST_MIN_STACK` affects all Rust applications, including build backends
we call (e.g., maturin). The minimum stack size is set to 1MB, the
lowest stack size we observed on a platform (Windows main thread).

Fixes #12769

## Test Plan

Tested manually with the example from #12769
This commit is contained in:
konsti 2025-04-16 09:27:46 +02:00 committed by GitHub
parent df35919d5a
commit 4d34b28657
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 113 additions and 51 deletions

View file

@ -15,10 +15,10 @@ pub use overrides::*;
pub use package_options::*;
pub use preview::*;
pub use project_build_backend::*;
pub use rayon::*;
pub use required_version::*;
pub use sources::*;
pub use target_triple::*;
pub use threading::*;
pub use trusted_host::*;
pub use trusted_publishing::*;
pub use vcs::*;
@ -40,10 +40,10 @@ mod overrides;
mod package_options;
mod preview;
mod project_build_backend;
mod rayon;
mod required_version;
mod sources;
mod target_triple;
mod threading;
mod trusted_host;
mod trusted_publishing;
mod vcs;

View file

@ -1,21 +0,0 @@
//! Initialize the rayon threadpool once, before we need it.
//!
//! The `uv` crate sets [`RAYON_PARALLELISM`] from the user settings, and the extract and install
//! code initialize the threadpool lazily only if they are actually used by calling
//! `LazyLock::force(&RAYON_INITIALIZE)`.
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::LazyLock;
/// The number of threads for the rayon threadpool.
///
/// The default of 0 makes rayon use its default.
pub static RAYON_PARALLELISM: AtomicUsize = AtomicUsize::new(0);
/// Initialize the threadpool lazily. Always call before using rayon the potentially first time.
pub static RAYON_INITIALIZE: LazyLock<()> = LazyLock::new(|| {
rayon::ThreadPoolBuilder::new()
.num_threads(RAYON_PARALLELISM.load(Ordering::SeqCst))
.build_global()
.expect("failed to initialize global rayon pool");
});

View file

@ -0,0 +1,69 @@
//! Configure rayon and determine thread stack sizes.
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::LazyLock;
use uv_static::EnvVars;
/// The default minimum stack size for uv threads.
pub const UV_DEFAULT_STACK_SIZE: usize = 4 * 1024 * 1024;
/// We don't allow setting a smaller stack size than 1MB.
#[allow(clippy::identity_op)]
pub const UV_MIN_STACK_SIZE: usize = 1 * 1024 * 1024;
/// Running out of stack has been an issue for us. We box types and futures in various places
/// to mitigate this.
///
/// Main thread stack-size has a BIG variety here across platforms and it's harder to control
/// (which is why Rust doesn't by default). Notably on macOS and Linux you will typically get 8MB
/// main thread, while on Windows you will typically get 1MB, which is *tiny*:
/// <https://learn.microsoft.com/en-us/cpp/build/reference/stack-stack-allocations?view=msvc-170>
///
/// To normalize this we just spawn a new thread called main2 with a size we can set
/// ourselves. 2MB is typically too small (especially for our debug builds), while 4MB
/// seems fine. This value can be changed with `UV_STACK_SIZE`, with a fallback to reading
/// `RUST_MIN_STACK`, to allow checking a larger or smaller stack size. There is a hardcoded stack
/// size minimum of 1MB, which is the lowest platform default we observed.
///
/// Non-main threads should all have 2MB, as Rust forces platform consistency there,
/// but even then stack overflows can occur in release mode
/// (<https://github.com/astral-sh/uv/issues/12769>), so rayon and tokio get the same stack size,
/// with the 4MB default.
pub fn min_stack_size() -> usize {
let stack_size = if let Some(uv_stack_size) = std::env::var(EnvVars::UV_STACK_SIZE)
.ok()
.and_then(|var| var.parse::<usize>().ok())
{
uv_stack_size
} else if let Some(uv_stack_size) = std::env::var(EnvVars::RUST_MIN_STACK)
.ok()
.and_then(|var| var.parse::<usize>().ok())
{
uv_stack_size
} else {
UV_DEFAULT_STACK_SIZE
};
if stack_size < UV_MIN_STACK_SIZE {
return UV_DEFAULT_STACK_SIZE;
}
stack_size
}
/// The number of threads for the rayon threadpool.
///
/// The default of 0 makes rayon use its default.
pub static RAYON_PARALLELISM: AtomicUsize = AtomicUsize::new(0);
/// Initialize the threadpool lazily. Always call before using rayon the potentially first time.
///
/// The `uv` crate sets [`RAYON_PARALLELISM`] from the user settings, and the extract and install
/// code initialize the threadpool lazily only if they are actually used by calling
/// `LazyLock::force(&RAYON_INITIALIZE)`.
pub static RAYON_INITIALIZE: LazyLock<()> = LazyLock::new(|| {
rayon::ThreadPoolBuilder::new()
.num_threads(RAYON_PARALLELISM.load(Ordering::SeqCst))
.stack_size(min_stack_size())
.build_global()
.expect("failed to initialize global rayon pool");
});

View file

@ -598,11 +598,27 @@ impl EnvVars {
/// Use to set the stack size used by uv.
///
/// The value is in bytes, and the default is typically 2MB (2097152).
/// The value is in bytes, and if both `UV_STACK_SIZE` are `RUST_MIN_STACK` unset, uv uses a 4MB
/// (4194304) stack. `UV_STACK_SIZE` takes precedence over `RUST_MIN_STACK`.
///
/// Unlike the normal `RUST_MIN_STACK` semantics, this can affect main thread
/// stack size, because we actually spawn our own main2 thread to work around
/// the fact that Windows' real main thread is only 1MB. That thread has size
/// `max(RUST_MIN_STACK, 4MB)`.
/// `max(UV_STACK_SIZE, 1MB)`.
pub const UV_STACK_SIZE: &'static str = "UV_STACK_SIZE";
/// Use to set the stack size used by uv.
///
/// The value is in bytes, and if both `UV_STACK_SIZE` are `RUST_MIN_STACK` unset, uv uses a 4MB
/// (4194304) stack. `UV_STACK_SIZE` takes precedence over `RUST_MIN_STACK`.
///
/// Prefer setting `UV_STACK_SIZE`, since `RUST_MIN_STACK` also affects subprocesses, such as
/// build backends that use Rust code.
///
/// Unlike the normal `RUST_MIN_STACK` semantics, this can affect main thread
/// stack size, because we actually spawn our own main2 thread to work around
/// the fact that Windows' real main thread is only 1MB. That thread has size
/// `max(RUST_MIN_STACK, 1MB)`.
pub const RUST_MIN_STACK: &'static str = "RUST_MIN_STACK";
/// The directory containing the `Cargo.toml` manifest for a package.

View file

@ -26,6 +26,7 @@ use uv_cli::{
use uv_cli::{PythonCommand, PythonNamespace, ToolCommand, ToolNamespace, TopLevelArgs};
#[cfg(feature = "self-update")]
use uv_cli::{SelfCommand, SelfNamespace, SelfUpdateArgs};
use uv_configuration::min_stack_size;
use uv_fs::{Simplified, CWD};
use uv_pep508::VersionOrUrl;
use uv_pypi_types::{ParsedDirectoryUrl, ParsedUrl};
@ -2036,32 +2037,12 @@ where
}
};
// Running out of stack has been an issue for us. We box types and futures in various places
// to mitigate this, with this being an especially important case.
//
// Non-main threads should all have 2MB, as Rust forces platform consistency there,
// but that can be overridden with the RUST_MIN_STACK environment variable if you need more.
//
// Main thread stack-size is the real issue. There's BIG variety here across platforms
// and it's harder to control (which is why Rust doesn't by default). Notably
// on macOS and Linux you will typically get 8MB main thread, while on Windows you will
// typically get 1MB, which is *tiny*:
// https://learn.microsoft.com/en-us/cpp/build/reference/stack-stack-allocations?view=msvc-170
//
// To normalize this we just spawn a new thread called main2 with a size we can set
// ourselves. 2MB is typically too small (especially for our debug builds), while 4MB
// seems fine. Also we still try to respect RUST_MIN_STACK if it's set, in case useful,
// but don't let it ask for a smaller stack to avoid messy misconfiguration since we
// know we use quite a bit of main stack space.
let main_stack_size = std::env::var(EnvVars::RUST_MIN_STACK)
.ok()
.and_then(|var| var.parse::<usize>().ok())
.unwrap_or(0)
.max(4 * 1024 * 1024);
// See `min_stack_size` doc comment about `main2`
let min_stack_size = min_stack_size();
let main2 = move || {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.thread_stack_size(min_stack_size)
.build()
.expect("Failed building the Runtime");
// Box the large main future to avoid stack overflows.
@ -2076,7 +2057,7 @@ where
};
let result = std::thread::Builder::new()
.name("main2".to_owned())
.stack_size(main_stack_size)
.stack_size(min_stack_size)
.spawn(main2)
.expect("Tokio executor failed, was there a panic?")
.join()