perf(cli): Improve concurrency when setting up node_modules and loading cached npm package info (#24018)

The same issue in two different places - doing blocking FS work in an
async task, limiting the amount of work that happens concurrently.

- When setting up node_modules, where we try to set up entries
concurrently but were blocking other tasks from actually running.
- When loading package info from the npm registry file cache, loading
and deserializing is expensive and prevents concurrency. This was
especially noticeable when loading an npm resolution snapshot from a
lockfile (`snapshot_from_lockfile` in `deno_npm`).

Installing deps in `deno-docs`:

```
❯ hyperfine -i -p 'rm -rf node_modules/' '../d7/deno-main i' '../d7/target/release/deno i'
Benchmark 1: ../d7/deno-main i
  Time (mean ± σ):      2.193 s ±  0.027 s    [User: 0.589 s, System: 1.033 s]
  Range (min … max):    2.151 s …  2.242 s    10 runs

Benchmark 2: ../d7/target/release/deno i
  Time (mean ± σ):      1.597 s ±  0.021 s    [User: 0.977 s, System: 1.337 s]
  Range (min … max):    1.550 s …  1.627 s    10 runs

Summary
  ../d7/target/release/deno i ran
    1.37 ± 0.02 times faster than ../d7/deno-main i
```

Caching `npm:@11ty/eleventy`:
```
❯ hyperfine -i -p 'rm -rf node_modules/' --warmup 5 '../../d7/deno-main cache npm:@11ty/eleventy' '../../d7/target/release/deno cache npm:@11ty/eleventy'
Benchmark 1: ../../d7/deno-main cache npm:@11ty/eleventy
  Time (mean ± σ):     129.9 ms ±   2.2 ms    [User: 27.5 ms, System: 101.3 ms]
  Range (min … max):   127.5 ms … 135.8 ms    10 runs

Benchmark 2: ../../d7/target/release/deno cache npm:@11ty/eleventy
  Time (mean ± σ):     100.6 ms ±   1.3 ms    [User: 38.8 ms, System: 233.8 ms]
  Range (min … max):    99.3 ms … 103.2 ms    10 runs

Summary
  ../../d7/target/release/deno cache npm:@11ty/eleventy ran
    1.29 ± 0.03 times faster than ../../d7/deno-main cache npm:@11ty/eleventy
```

---------

Co-authored-by: David Sherret <dsherret@gmail.com>
This commit is contained in:
Nathan Whitaker 2024-05-28 14:17:36 -07:00 committed by GitHub
parent 7d8a8a0461
commit 2024c974b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 45 additions and 28 deletions

View file

@ -142,24 +142,21 @@ impl CliNpmRegistryApiInner {
} }
Some(CacheItem::Pending(future)) => (false, future.clone()), Some(CacheItem::Pending(future)) => (false, future.clone()),
None => { None => {
if (self.cache.cache_setting().should_use_for_npm_package(name) && !self.force_reload())
// if this has been previously reloaded, then try loading from the
// file system cache
|| !self.previously_reloaded_packages.lock().insert(name.to_string())
{
// attempt to load from the file cache
if let Some(info) = self.load_file_cached_package_info(name) {
let result = Some(Arc::new(info));
mem_cache
.insert(name.to_string(), CacheItem::Resolved(result.clone()));
return Ok(result);
}
}
let future = { let future = {
let api = self.clone(); let api = self.clone();
let name = name.to_string(); let name = name.to_string();
async move { async move {
if (api.cache.cache_setting().should_use_for_npm_package(&name) && !api.force_reload())
// if this has been previously reloaded, then try loading from the
// file system cache
|| !api.previously_reloaded_packages.lock().insert(name.to_string())
{
// attempt to load from the file cache
if let Some(info) = api.load_file_cached_package_info(&name).await {
let result = Some(Arc::new(info));
return Ok(result);
}
}
api api
.load_package_info_from_registry(&name) .load_package_info_from_registry(&name)
.await .await
@ -201,11 +198,11 @@ impl CliNpmRegistryApiInner {
self.force_reload_flag.is_raised() self.force_reload_flag.is_raised()
} }
fn load_file_cached_package_info( async fn load_file_cached_package_info(
&self, &self,
name: &str, name: &str,
) -> Option<NpmPackageInfo> { ) -> Option<NpmPackageInfo> {
match self.load_file_cached_package_info_result(name) { match self.load_file_cached_package_info_result(name).await {
Ok(value) => value, Ok(value) => value,
Err(err) => { Err(err) => {
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
@ -217,18 +214,25 @@ impl CliNpmRegistryApiInner {
} }
} }
fn load_file_cached_package_info_result( async fn load_file_cached_package_info_result(
&self, &self,
name: &str, name: &str,
) -> Result<Option<NpmPackageInfo>, AnyError> { ) -> Result<Option<NpmPackageInfo>, AnyError> {
let file_cache_path = self.get_package_file_cache_path(name); let file_cache_path = self.get_package_file_cache_path(name);
let file_text = match fs::read_to_string(file_cache_path) { let deserialization_result = deno_core::unsync::spawn_blocking(|| {
Ok(file_text) => file_text, let file_text = match fs::read_to_string(file_cache_path) {
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), Ok(file_text) => file_text,
Err(err) => return Err(err.into()), Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None),
}; Err(err) => return Err(err.into()),
match serde_json::from_str(&file_text) { };
Ok(package_info) => Ok(Some(package_info)), serde_json::from_str(&file_text)
.map(Some)
.map_err(AnyError::from)
})
.await
.unwrap();
match deserialization_result {
Ok(maybe_package_info) => Ok(maybe_package_info),
Err(err) => { Err(err) => {
// This scenario might mean we need to load more data from the // This scenario might mean we need to load more data from the
// npm registry than before. So, just debug log while in debug // npm registry than before. So, just debug log while in debug
@ -317,7 +321,10 @@ impl CliNpmRegistryApiInner {
.await?; .await?;
match maybe_bytes { match maybe_bytes {
Some(bytes) => { Some(bytes) => {
let package_info = serde_json::from_slice(&bytes)?; let package_info = deno_core::unsync::spawn_blocking(move || {
serde_json::from_slice(&bytes)
})
.await??;
self.save_package_info_to_file_cache(name, &package_info); self.save_package_info_to_file_cache(name, &package_info);
Ok(Some(package_info)) Ok(Some(package_info))
} }

View file

@ -332,9 +332,18 @@ async fn sync_resolution_with_fs(
join_package_name(&sub_node_modules, &package.id.nv.name); join_package_name(&sub_node_modules, &package.id.nv.name);
let cache_folder = let cache_folder =
cache.package_folder_for_name_and_version(&package.id.nv); cache.package_folder_for_name_and_version(&package.id.nv);
clone_dir_recursive(&cache_folder, &package_path)?;
// write out a file that indicates this folder has been initialized deno_core::unsync::spawn_blocking({
fs::write(initialized_file, "")?; let package_path = package_path.clone();
move || {
clone_dir_recursive(&cache_folder, &package_path)?;
// write out a file that indicates this folder has been initialized
fs::write(initialized_file, "")?;
Ok::<_, AnyError>(())
}
})
.await??;
if package.bin.is_some() { if package.bin.is_some() {
bin_entries_to_setup bin_entries_to_setup
@ -374,6 +383,7 @@ async fn sync_resolution_with_fs(
.join("node_modules"), .join("node_modules"),
&package.id.nv.name, &package.id.nv.name,
); );
clone_dir_recursive(&source_path, &package_path)?; clone_dir_recursive(&source_path, &package_path)?;
// write out a file that indicates this folder has been initialized // write out a file that indicates this folder has been initialized
fs::write(initialized_file, "")?; fs::write(initialized_file, "")?;