feat(cli): use Rayon when linting many items (#2244)

2025-12-23 08:48:15 +00:00 · 2025-11-26 10:17:58 -07:00 · 2025-11-26 10:17:58 -07:00 · a5539e4a48
commit a5539e4a48
parent 2a954e7014
3 changed files with 68 additions and 34 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2683,6 +2683,7 @@ dependencies = [
 "harper-stats",
 "harper-typst",
 "hashbrown 0.16.1",
+ "rayon",
 "serde",
 "serde_json",
 "strum",
--- a/harper-cli/Cargo.toml
+++ b/harper-cli/Cargo.toml
@ -18,6 +18,7 @@ harper-pos-utils = { path = "../harper-pos-utils", version = "0.73.0", features
 harper-comments = { path = "../harper-comments", version = "0.73.0" }
 harper-typst = { path = "../harper-typst", version = "0.73.0" }
 hashbrown = "0.16.1"
+rayon = "1.11.0"
 serde = { version = "1.0.228", features = ["derive"] }
 serde_json = "1.0.145"
 strum = "0.27.2"
--- a/harper-cli/src/lint.rs
+++ b/harper-cli/src/lint.rs
@ -4,8 +4,8 @@ use std::sync::Arc;
 use std::{fs, process};

 use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
-use either::Either;
 use hashbrown::HashMap;
+use rayon::prelude::*;

 use harper_core::{
    linting::{Lint, LintGroup, LintGroupConfig, LintKind},
@ -59,6 +59,12 @@ struct InputInfo<'a> {
    input: Input,
 }

+struct InputJob {
+    batch_mode: bool,
+    parent_input_id: String,
+    input: Input,
+}
+
 trait InputPath {
    fn format_path(&self) -> String;
 }
@ -151,35 +157,53 @@ pub fn lint(
        false => ReportStyle::FullAriadneLintReport,
    };

+    let mut input_jobs = Vec::new();
    for user_input in all_user_inputs {
        let (batch_mode, maybe_dir) = match &user_input {
            Input::Dir(dir) => (true, std::fs::read_dir(dir).ok()),
            _ => (false, None),
        };

-        // All the files within this input if it's a Dir, or just this input otherwise.
-        let inputs = if let Some(dir) = maybe_dir {
-            let mut entries: Vec<_> = dir
-                .filter_map(Result::ok)
-                .filter(|entry| entry.file_type().map(|ft| !ft.is_dir()).unwrap_or(false))
-                .collect();
-
-            // Sort entries by file name
-            entries.sort_by_key(|entry| entry.file_name());
-
-            Either::Left(entries.into_iter().map(|entry| Input::File(entry.path())))
-        } else {
-            Either::Right(std::iter::once(user_input.clone()))
-        };
-
        let parent_input_id = if batch_mode {
            user_input.get_identifier().to_string()
        } else {
            String::new()
        };

-        for current_input in inputs {
-            let lint_results = lint_one_input(
+        if let Some(dir) = maybe_dir {
+            let mut entries: Vec<_> = dir
+                .filter_map(Result::ok)
+                .filter(|entry| entry.file_type().map(|ft| !ft.is_dir()).unwrap_or(false))
+                .collect();
+
+            entries.sort_by_key(|entry| entry.file_name());
+
+            for entry in entries {
+                input_jobs.push(InputJob {
+                    batch_mode,
+                    parent_input_id: parent_input_id.clone(),
+                    input: Input::File(entry.path()),
+                });
+            }
+        } else {
+            input_jobs.push(InputJob {
+                batch_mode,
+                parent_input_id,
+                input: user_input.clone(),
+            });
+        }
+    }
+
+    let per_input_results = {
+        let run_job = |job: InputJob| {
+            let InputJob {
+                batch_mode,
+                parent_input_id,
+                input,
+            } = job;
+            let parent_id_ref = parent_input_id.as_str();
+
+            lint_one_input(
                // Common properties of harper-cli
                markdown_options,
                &curated_plus_user_dict,
@ -196,24 +220,32 @@ pub fn lint(
                batch_mode,
                // The current input to be linted
                InputInfo {
-                    parent_input_id: &parent_input_id,
-                    input: current_input,
+                    parent_input_id: parent_id_ref,
+                    input,
                },
-            );
+            )
+        };

-            // Update the global stats
-            for (kind, count) in lint_results.0 {
-                *all_lint_kinds.entry(kind).or_insert(0) += count;
-            }
-            for (rule, count) in lint_results.1 {
-                *all_rules.entry(rule).or_insert(0) += count;
-            }
-            for ((kind, rule), count) in lint_results.2 {
-                *all_lint_kind_rule_pairs.entry((kind, rule)).or_insert(0) += count;
-            }
-            for (word, count) in lint_results.3 {
-                *all_spellos.entry(word).or_insert(0) += count;
-            }
+        if input_jobs.len() > 1 {
+            input_jobs.into_par_iter().map(run_job).collect::<Vec<_>>()
+        } else {
+            input_jobs.into_iter().map(run_job).collect::<Vec<_>>()
+        }
+    };
+
+    for lint_results in per_input_results {
+        // Update the global stats
+        for (kind, count) in lint_results.0 {
+            *all_lint_kinds.entry(kind).or_insert(0) += count;
+        }
+        for (rule, count) in lint_results.1 {
+            *all_rules.entry(rule).or_insert(0) += count;
+        }
+        for ((kind, rule), count) in lint_results.2 {
+            *all_lint_kind_rule_pairs.entry((kind, rule)).or_insert(0) += count;
+        }
+        for (word, count) in lint_results.3 {
+            *all_spellos.entry(word).or_insert(0) += count;
        }
    }