feat(cli): use Rayon when linting many items (#2244)

This commit is contained in:
Elijah Potter 2025-11-26 10:17:58 -07:00 committed by GitHub
parent 2a954e7014
commit a5539e4a48
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 68 additions and 34 deletions

1
Cargo.lock generated
View file

@ -2683,6 +2683,7 @@ dependencies = [
"harper-stats",
"harper-typst",
"hashbrown 0.16.1",
"rayon",
"serde",
"serde_json",
"strum",

View file

@ -18,6 +18,7 @@ harper-pos-utils = { path = "../harper-pos-utils", version = "0.73.0", features
harper-comments = { path = "../harper-comments", version = "0.73.0" }
harper-typst = { path = "../harper-typst", version = "0.73.0" }
hashbrown = "0.16.1"
rayon = "1.11.0"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.145"
strum = "0.27.2"

View file

@ -4,8 +4,8 @@ use std::sync::Arc;
use std::{fs, process};
use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
use either::Either;
use hashbrown::HashMap;
use rayon::prelude::*;
use harper_core::{
linting::{Lint, LintGroup, LintGroupConfig, LintKind},
@ -59,6 +59,12 @@ struct InputInfo<'a> {
input: Input,
}
struct InputJob {
batch_mode: bool,
parent_input_id: String,
input: Input,
}
trait InputPath {
fn format_path(&self) -> String;
}
@ -151,35 +157,53 @@ pub fn lint(
false => ReportStyle::FullAriadneLintReport,
};
let mut input_jobs = Vec::new();
for user_input in all_user_inputs {
let (batch_mode, maybe_dir) = match &user_input {
Input::Dir(dir) => (true, std::fs::read_dir(dir).ok()),
_ => (false, None),
};
// All the files within this input if it's a Dir, or just this input otherwise.
let inputs = if let Some(dir) = maybe_dir {
let mut entries: Vec<_> = dir
.filter_map(Result::ok)
.filter(|entry| entry.file_type().map(|ft| !ft.is_dir()).unwrap_or(false))
.collect();
// Sort entries by file name
entries.sort_by_key(|entry| entry.file_name());
Either::Left(entries.into_iter().map(|entry| Input::File(entry.path())))
} else {
Either::Right(std::iter::once(user_input.clone()))
};
let parent_input_id = if batch_mode {
user_input.get_identifier().to_string()
} else {
String::new()
};
for current_input in inputs {
let lint_results = lint_one_input(
if let Some(dir) = maybe_dir {
let mut entries: Vec<_> = dir
.filter_map(Result::ok)
.filter(|entry| entry.file_type().map(|ft| !ft.is_dir()).unwrap_or(false))
.collect();
entries.sort_by_key(|entry| entry.file_name());
for entry in entries {
input_jobs.push(InputJob {
batch_mode,
parent_input_id: parent_input_id.clone(),
input: Input::File(entry.path()),
});
}
} else {
input_jobs.push(InputJob {
batch_mode,
parent_input_id,
input: user_input.clone(),
});
}
}
let per_input_results = {
let run_job = |job: InputJob| {
let InputJob {
batch_mode,
parent_input_id,
input,
} = job;
let parent_id_ref = parent_input_id.as_str();
lint_one_input(
// Common properties of harper-cli
markdown_options,
&curated_plus_user_dict,
@ -196,24 +220,32 @@ pub fn lint(
batch_mode,
// The current input to be linted
InputInfo {
parent_input_id: &parent_input_id,
input: current_input,
parent_input_id: parent_id_ref,
input,
},
);
)
};
// Update the global stats
for (kind, count) in lint_results.0 {
*all_lint_kinds.entry(kind).or_insert(0) += count;
}
for (rule, count) in lint_results.1 {
*all_rules.entry(rule).or_insert(0) += count;
}
for ((kind, rule), count) in lint_results.2 {
*all_lint_kind_rule_pairs.entry((kind, rule)).or_insert(0) += count;
}
for (word, count) in lint_results.3 {
*all_spellos.entry(word).or_insert(0) += count;
}
if input_jobs.len() > 1 {
input_jobs.into_par_iter().map(run_job).collect::<Vec<_>>()
} else {
input_jobs.into_iter().map(run_job).collect::<Vec<_>>()
}
};
for lint_results in per_input_results {
// Update the global stats
for (kind, count) in lint_results.0 {
*all_lint_kinds.entry(kind).or_insert(0) += count;
}
for (rule, count) in lint_results.1 {
*all_rules.entry(rule).or_insert(0) += count;
}
for ((kind, rule), count) in lint_results.2 {
*all_lint_kind_rule_pairs.entry((kind, rule)).or_insert(0) += count;
}
for (word, count) in lint_results.3 {
*all_spellos.entry(word).or_insert(0) += count;
}
}