fix(cli): configure v8 isolate with cgroups-constrained memory limit (#29078)

This change configures V8 isolates to respect memory limits imposed by
cgroups on Linux.

It adds support for detecting both cgroups v1 and v2 memory limits,
enabling Deno to properly adapt to containerized environments with
memory constraints. When cgroups information is unavailable or not
applicable, it falls back to using the system's total memory as before.

Closes #29077

## Test

For testing, I created a ubuntu VM with 1Gi memory. Within this VM, set
up a cgroup with 512Mi memory limit, then ran the following script to
see how much heap size limit the V8 isolate had.

```js
import * as v8 from "node:v8";

console.log(v8.getHeapStatistics());
```

### Ubuntu 20.04

In this version of ubuntu, hybrid mode is enabled by default.

```
$ cat /proc/self/cgroup
12:rdma:/
11:blkio:/user.slice
10:devices:/user.slice
9:cpu,cpuacct:/user.slice
8:pids:/user.slice/user-1000.slice/session-3.scope
7:memory:/user.slice/user-1000.slice/session-3.scope
6:perf_event:/
5:freezer:/
4:net_cls,net_prio:/
3:hugetlb:/
2:cpuset:/
1:name=systemd:/user.slice/user-1000.slice/session-3.scope
0::/user.slice/user-1000.slice/session-3.scope
```

Create a new cgroup with 512Mi memory limit and run the above script in
this cgroup:

```
$ sudo cgcreate -g memory:/mygroup
$ sudo cgset -r memory.limit_in_bytes=$((512 * 1024 * 1024)) mygroup
$ sudo cgexec -g memory:mygroup ./deno run main.mjs
{
  total_heap_size: 7745536,
  total_heap_size_executable: 0,
  total_physical_size: 7090176,
  total_available_size: 266348216,
  used_heap_size: 6276752,
  heap_size_limit: 271581184,
  malloced_memory: 303200,
  peak_malloced_memory: 140456,
  does_zap_garbage: 0,
  number_of_native_contexts: 1,
  number_of_detached_contexts: 0,
  total_global_handles_size: 24576,
  used_global_handles_size: 22432,
  external_memory: 3232012
}
```

This indicates that the isolate was informed of cgroup-constrained
memory limit (512Mi) and hence got ~270M heap limit.

### Ubuntu 22.04

In this version of ubuntu, cgroup v2 is used.

```
$ cat /proc/self/cgroup
0::/user.slice/user-1000.slice/session-3.scope
```

Run the above script using `systemd-run`:

```
$ sudo systemd-run --property=MemoryMax=512M --pty bash -c '/home/ubuntu/deno run /home/ubuntu/main.mjs'
{
  total_heap_size: 7745536,
  total_heap_size_executable: 0,
  total_physical_size: 7090176,
  total_available_size: 266348184,
  used_heap_size: 6276784,
  heap_size_limit: 271581184,
  malloced_memory: 303200,
  peak_malloced_memory: 140456,
  does_zap_garbage: 0,
  number_of_native_contexts: 1,
  number_of_detached_contexts: 0,
  total_global_handles_size: 24576,
  used_global_handles_size: 22432,
  external_memory: 3232012
}
```

Again the isolate got ~270M heap limit properly.
Note that it should have had bigger heap limit if the entire system
memory, i.e. 1Gi, had been passed to V8. In fact, if we run the same
script outside the cgroup, it does display larger `heap_size_limit` like
below:

```
$ ./deno run main.mjs
{
  total_heap_size: 7745536,
  total_heap_size_executable: 0,
  total_physical_size: 7090176,
  total_available_size: 546580152,
  used_heap_size: 6276752,
  heap_size_limit: 551813120,
  malloced_memory: 303200,
  peak_malloced_memory: 140456,
  does_zap_garbage: 0,
  number_of_native_contexts: 1,
  number_of_detached_contexts: 0,
  total_global_handles_size: 24576,
  used_global_handles_size: 22432,
  external_memory: 3232012
}
```

---------

Signed-off-by: Yusuke Tanaka <wing0920@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Yusuke Tanaka 2025-05-14 00:07:59 +09:00 committed by GitHub
parent 6002d2624e
commit 9b2b1c41f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 172 additions and 13 deletions

View file

@ -1213,6 +1213,7 @@ static ENV_VARIABLES_HELP: &str = cstr!(
<g>DENO_TLS_CA_STORE</> Comma-separated list of order dependent certificate stores.
Possible values: "system", "mozilla" <p(245)>(defaults to "mozilla")</>
<g>DENO_TRACE_PERMISSIONS</> Environmental variable to enable stack traces in permission prompts.
<g>DENO_USE_CGROUPS</> Use cgroups to determine V8 memory limit
<g>FORCE_COLOR</> Set force color output even if stdout isn't a tty
<g>HTTP_PROXY</> Proxy address for HTTP requests
<p(245)>(module downloads, fetch)</>

View file

@ -128,13 +128,169 @@ pub fn get_cache_storage_dir() -> PathBuf {
/// By default V8 uses 1.4Gb heap limit which is meant for browser tabs.
/// Instead probe for the total memory on the system and use it instead
/// as a default.
pub fn create_isolate_create_params() -> Option<v8::CreateParams> {
let maybe_mem_info = deno_runtime::deno_os::sys_info::mem_info();
maybe_mem_info.map(|mem_info| {
v8::CreateParams::default()
.heap_limits_from_system_memory(mem_info.total, 0)
})
/// as a default. In case the platform is Linux and `DENO_USE_CGROUPS` is set,
/// parse cgroup config to get the cgroup-constrained memory limit.
pub fn create_isolate_create_params<TSys: DenoLibSys>(
// This is used only in Linux to get cgroup-constrained memory limit.
#[allow(unused_variables)] sys: &TSys,
) -> Option<v8::CreateParams> {
#[cfg(any(target_os = "android", target_os = "linux"))]
{
linux::get_memory_limit(sys).map(|memory_limit| {
v8::CreateParams::default()
.heap_limits_from_system_memory(memory_limit, 0)
})
}
#[cfg(not(any(target_os = "android", target_os = "linux")))]
{
let maybe_mem_info = deno_runtime::deno_os::sys_info::mem_info();
maybe_mem_info.map(|mem_info| {
v8::CreateParams::default()
.heap_limits_from_system_memory(mem_info.total, 0)
})
}
}
#[cfg(any(target_os = "android", target_os = "linux"))]
mod linux {
/// Get memory limit with cgroup (either v1 or v2) taken into account.
pub(super) fn get_memory_limit<TSys: crate::sys::DenoLibSys>(
sys: &TSys,
) -> Option<u64> {
let system_total_memory = deno_runtime::deno_os::sys_info::mem_info()
.map(|mem_info| mem_info.total);
// For performance, parse cgroup config only when DENO_USE_CGROUPS is set
if std::env::var("DENO_USE_CGROUPS").is_err() {
return system_total_memory;
}
let Ok(self_cgroup) = sys.fs_read_to_string("/proc/self/cgroup") else {
return system_total_memory;
};
let limit = match parse_self_cgroup(&self_cgroup) {
CgroupVersion::V1 { cgroup_relpath } => {
let limit_path = std::path::Path::new("/sys/fs/cgroup/memory")
.join(cgroup_relpath)
.join("memory.limit_in_bytes");
sys
.fs_read_to_string(limit_path)
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
}
CgroupVersion::V2 { cgroup_relpath } => {
let limit_path = std::path::Path::new("/sys/fs/cgroup")
.join(cgroup_relpath)
.join("memory.max");
sys
.fs_read_to_string(limit_path)
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
}
CgroupVersion::None => system_total_memory,
};
limit.or(system_total_memory)
}
enum CgroupVersion<'a> {
V1 { cgroup_relpath: &'a str },
V2 { cgroup_relpath: &'a str },
None,
}
fn parse_self_cgroup(self_cgroup_content: &str) -> CgroupVersion<'_> {
// Initialize the cgroup version as None. This will be updated based on the parsed lines.
let mut cgroup_version = CgroupVersion::None;
// Iterate through each line in the cgroup content. Each line represents a cgroup entry.
for line in self_cgroup_content.lines() {
// Split the line into parts using ":" as the delimiter. The format is typically:
// "<hierarchy_id>:<subsystems>:<cgroup_path>"
let split = line.split(":").collect::<Vec<_>>();
match &split[..] {
// If the line specifies "memory" as the subsystem, it indicates cgroup v1 is used
// for memory management. Extract the relative path and update the cgroup version.
[_, "memory", cgroup_v1_relpath] => {
cgroup_version = CgroupVersion::V1 {
cgroup_relpath: cgroup_v1_relpath
.strip_prefix("/")
.unwrap_or(cgroup_v1_relpath),
};
// Break early since v1 explicitly manages memory, and no further checks are needed.
break;
}
// If the line starts with "0::", it indicates cgroup v2 is used. However, in hybrid
// mode, memory might still be managed by v1. Continue checking other lines to confirm.
["0", "", cgroup_v2_relpath] => {
cgroup_version = CgroupVersion::V2 {
cgroup_relpath: cgroup_v2_relpath
.strip_prefix("/")
.unwrap_or(cgroup_v2_relpath),
};
}
_ => {}
}
}
cgroup_version
}
#[test]
fn test_parse_self_cgroup_v2() {
let self_cgroup = "0::/user.slice/user-1000.slice/session-3.scope";
let cgroup_version = parse_self_cgroup(self_cgroup);
assert!(matches!(
cgroup_version,
CgroupVersion::V2 { cgroup_relpath } if cgroup_relpath == "user.slice/user-1000.slice/session-3.scope"
));
}
#[test]
fn test_parse_self_cgroup_hybrid() {
let self_cgroup = r#"12:rdma:/
11:blkio:/user.slice
10:devices:/user.slice
9:cpu,cpuacct:/user.slice
8:pids:/user.slice/user-1000.slice/session-3.scope
7:memory:/user.slice/user-1000.slice/session-3.scope
6:perf_event:/
5:freezer:/
4:net_cls,net_prio:/
3:hugetlb:/
2:cpuset:/
1:name=systemd:/user.slice/user-1000.slice/session-3.scope
0::/user.slice/user-1000.slice/session-3.scope
"#;
let cgroup_version = parse_self_cgroup(self_cgroup);
assert!(matches!(
cgroup_version,
CgroupVersion::V1 { cgroup_relpath } if cgroup_relpath == "user.slice/user-1000.slice/session-3.scope"
));
}
#[test]
fn test_parse_self_cgroup_v1() {
let self_cgroup = r#"11:hugetlb:/
10:pids:/user.slice/user-1000.slice
9:perf_event:/
8:devices:/user.slice
7:net_cls,net_prio:/
6:memory:/
5:blkio:/
4:cpuset:/
3:cpu,cpuacct:/
2:freezer:/
1:name=systemd:/user.slice/user-1000.slice/session-2.scope
"#;
let cgroup_version = parse_self_cgroup(self_cgroup);
assert!(matches!(
cgroup_version,
CgroupVersion::V1 { cgroup_relpath } if cgroup_relpath.is_empty()
));
}
}
#[derive(Debug, thiserror::Error, deno_error::JsError)]
@ -331,7 +487,7 @@ impl<TSys: DenoLibSys> LibWorkerFactorySharedState<TSys> {
},
extensions: vec![],
startup_snapshot: shared.options.startup_snapshot,
create_params: create_isolate_create_params(),
create_params: create_isolate_create_params(&shared.sys),
unsafely_ignore_certificate_errors: shared
.options
.unsafely_ignore_certificate_errors
@ -508,7 +664,7 @@ impl<TSys: DenoLibSys> LibMainWorkerFactory<TSys> {
},
extensions: custom_extensions,
startup_snapshot: shared.options.startup_snapshot,
create_params: create_isolate_create_params(),
create_params: create_isolate_create_params(&shared.sys),
unsafely_ignore_certificate_errors: shared
.options
.unsafely_ignore_certificate_errors

View file

@ -3921,7 +3921,7 @@ impl Inner {
}
#[cfg_attr(feature = "lsp-tracing", tracing::instrument(skip_all))]
fn project_changed<'a>(
fn project_changed(
&mut self,
changed_specifiers: impl IntoIterator<Item = (Arc<Url>, ChangeKind)>,
scopes_change: ProjectScopesChange,

View file

@ -5056,7 +5056,7 @@ fn run_tsc_thread(
));
let mut tsc_runtime = JsRuntime::new(RuntimeOptions {
extensions,
create_params: create_isolate_create_params(),
create_params: create_isolate_create_params(&crate::sys::CliSys::default()),
startup_snapshot: deno_snapshots::CLI_SNAPSHOT,
inspector: has_inspector_server,
..Default::default()

View file

@ -637,7 +637,9 @@ fn wait_for_start(
crate::sys::CliSys,
>(
startup_snapshot,
deno_lib::worker::create_isolate_create_params(),
deno_lib::worker::create_isolate_create_params(
&crate::sys::CliSys::default(),
),
Some(roots.shared_array_buffer_store.clone()),
Some(roots.compiled_wasm_module_store.clone()),
vec![],

View file

@ -1433,7 +1433,7 @@ pub fn exec(
});
let mut runtime = JsRuntime::new(RuntimeOptions {
extensions,
create_params: create_isolate_create_params(),
create_params: create_isolate_create_params(&crate::sys::CliSys::default()),
startup_snapshot: deno_snapshots::CLI_SNAPSHOT,
extension_code_cache,
..Default::default()