harper/harper-core/src/linting/mod.rs
Andrew Dunbar 86edc6e34f
feat: brandish→brand (#2325)
* feat: brandish→brand

* fix: sort `mod`s

* fix: exclude 3 pronouns; variation tests
2025-12-11 18:23:27 +00:00

516 lines
14 KiB
Rust

//! Frameworks and rules that locate errors in text.
//!
//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
mod a_part;
mod a_while;
mod addicting;
mod adjective_double_degree;
mod adjective_of_a;
mod after_later;
mod all_intents_and_purposes;
mod allow_to;
mod am_in_the_morning;
mod amounts_for;
mod an_a;
mod and_in;
mod and_the_like;
mod another_thing_coming;
mod another_think_coming;
mod apart_from;
mod ask_no_preposition;
mod avoid_curses;
mod back_in_the_day;
mod be_allowed;
mod best_of_all_time;
mod boring_words;
mod bought;
mod brand_brandish;
mod call_them;
mod cant;
mod capitalize_personal_pronouns;
mod cautionary_tale;
mod change_tack;
mod chock_full;
mod closed_compounds;
mod comma_fixes;
mod compound_nouns;
mod compound_subject_i;
mod confident;
mod correct_number_suffix;
mod criteria_phenomena;
mod cure_for;
mod currency_placement;
mod dashes;
mod despite_of;
mod determiner_without_noun;
mod didnt;
mod discourse_markers;
mod disjoint_prefixes;
mod dot_initialisms;
mod double_click;
mod double_modal;
mod ellipsis_length;
mod else_possessive;
mod ever_every;
mod everyday;
mod expand_memory_shorthands;
mod expand_time_shorthands;
mod expr_linter;
mod far_be_it;
mod fascinated_by;
mod feel_fell;
mod few_units_of_time_ago;
mod filler_words;
mod find_fine;
mod first_aid_kit;
mod for_noun;
mod free_predicate;
mod friend_of_me;
mod go_so_far_as_to;
mod handful;
mod have_pronoun;
mod have_take_a_look;
mod hedging;
mod hello_greeting;
mod hereby;
mod hop_hope;
mod hope_youre;
mod how_to;
mod hyphenate_number_day;
mod i_am_agreement;
mod if_wouldve;
mod in_on_the_cards;
mod inflected_verb_after_to;
mod initialism_linter;
mod initialisms;
mod interested_in;
mod it_is;
mod it_looks_like_that;
mod it_would_be;
mod its_contraction;
mod its_possessive;
mod jealous_of;
mod johns_hopkins;
mod left_right_hand;
mod less_worse;
mod let_to_do;
mod lets_confusion;
mod likewise;
mod lint;
mod lint_group;
mod lint_kind;
mod long_sentences;
mod looking_forward_to;
mod map_phrase_linter;
mod map_phrase_set_linter;
mod mass_nouns;
mod merge_linters;
mod merge_words;
mod missing_preposition;
mod missing_space;
mod missing_to;
mod misspell;
mod mixed_bag;
mod modal_be_adjective;
mod modal_of;
mod modal_seem;
mod months;
mod more_better;
mod most_number;
mod most_of_the_times;
mod multiple_sequential_pronouns;
mod nail_on_the_head;
mod need_to_noun;
mod no_french_spaces;
mod no_match_for;
mod no_oxford_comma;
mod nobody;
mod nominal_wants;
mod noun_verb_confusion;
mod number_suffix_capitalization;
mod of_course;
mod on_floor;
mod once_or_twice;
mod one_and_the_same;
mod open_compounds;
mod open_the_light;
mod orthographic_consistency;
mod ought_to_be;
mod out_of_date;
mod oxford_comma;
mod oxymorons;
mod phrasal_verb_as_compound_noun;
mod phrase_corrections;
mod phrase_set_corrections;
mod pique_interest;
mod possessive_noun;
mod possessive_your;
mod progressive_needs_be;
mod pronoun_are;
mod pronoun_contraction;
mod pronoun_inflection_be;
mod pronoun_knew;
mod proper_noun_capitalization_linters;
mod quantifier_needs_of;
mod quantifier_numeral_conflict;
mod quite_quiet;
mod quote_spacing;
mod redundant_acronyms;
mod redundant_additive_adverbs;
mod regionalisms;
mod repeated_words;
mod respond;
mod right_click;
mod roller_skated;
mod safe_to_save;
mod save_to_safe;
mod semicolon_apostrophe;
mod sentence_capitalization;
mod shoot_oneself_in_the_foot;
mod simple_past_to_past_participle;
mod since_duration;
mod single_be;
mod some_without_article;
mod something_is;
mod somewhat_something;
mod soon_to_be;
mod sought_after;
mod spaces;
mod spell_check;
mod spelled_numbers;
mod split_words;
mod subject_pronoun;
mod suggestion;
mod take_medicine;
mod take_serious;
mod that_than;
mod that_which;
mod the_how_why;
mod the_my;
mod then_than;
mod theres;
mod theses_these;
mod thing_think;
mod though_thought;
mod throw_away;
mod throw_rubbish;
mod to_adverb;
mod to_two_too;
mod touristic;
mod unclosed_quotes;
mod update_place_names;
mod use_genitive;
mod use_title_case;
mod verb_to_adjective;
mod very_unique;
mod vice_versa;
mod was_aloud;
mod way_too_adjective;
mod well_educated;
mod whereas;
mod widely_accepted;
mod win_prize;
mod wish_could;
mod wordpress_dotcom;
mod would_never_have;
pub use expr_linter::ExprLinter;
pub use initialism_linter::InitialismLinter;
pub use lint::Lint;
pub use lint_group::{LintGroup, LintGroupConfig};
pub use lint_kind::LintKind;
pub use map_phrase_linter::MapPhraseLinter;
pub use map_phrase_set_linter::MapPhraseSetLinter;
pub use spell_check::SpellCheck;
pub use suggestion::Suggestion;
use crate::{Document, LSend, render_markdown};
/// A __stateless__ rule that searches documents for grammatical errors.
///
/// Commonly implemented via [`ExprLinter`].
///
/// See also: [`LintGroup`].
pub trait Linter: LSend {
/// Analyzes a document and produces zero or more [`Lint`]s.
/// We pass `self` mutably for caching purposes.
fn lint(&mut self, document: &Document) -> Vec<Lint>;
/// A user-facing description of what kinds of grammatical errors this rule looks for.
/// It is usually shown in settings menus.
fn description(&self) -> &str;
}
/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
pub trait HtmlDescriptionLinter {
fn description_html(&self) -> String;
}
impl<L: ?Sized> HtmlDescriptionLinter for L
where
L: Linter,
{
fn description_html(&self) -> String {
let desc = self.description();
render_markdown(desc)
}
}
#[cfg(test)]
pub mod tests {
use crate::parsers::Markdown;
use crate::{Document, Span, Token};
use hashbrown::HashSet;
/// Extension trait for converting spans of tokens back to their original text
pub trait SpanVecExt {
fn to_strings(&self, doc: &Document) -> Vec<String>;
}
impl SpanVecExt for Vec<Span<Token>> {
fn to_strings(&self, doc: &Document) -> Vec<String> {
self.iter()
.map(|sp| {
doc.get_tokens()[sp.start..sp.end]
.iter()
.map(|tok| doc.get_span_content_str(&tok.span))
.collect::<String>()
})
.collect()
}
}
use super::Linter;
use crate::spell::FstDictionary;
#[track_caller]
pub fn assert_no_lints(text: &str, linter: impl Linter) {
assert_lint_count(text, linter, 0);
}
#[track_caller]
pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
let test = Document::new_markdown_default_curated(text);
let lints = linter.lint(&test);
dbg!(&lints);
if lints.len() != count {
panic!(
"Expected \"{text}\" to create {count} lints, but it created {}.",
lints.len()
);
}
}
/// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
/// [`Lint`]s.
#[track_caller]
pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
let test = Document::new_markdown_default_curated(text);
let lints = linter.lint(&test);
assert_eq!(
lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
count
);
}
/// Runs a provided linter on text, applies the first suggestion from each lint
/// and asserts whether the result is equal to a given value.
#[track_caller]
pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
assert_nth_suggestion_result(text, linter, expected_result, 0);
}
/// Runs a provided linter on text, applies the nth suggestion from each lint
/// and asserts whether the result is equal to a given value.
///
/// Note that `n` starts at zero.
#[track_caller]
pub fn assert_nth_suggestion_result(
text: &str,
mut linter: impl Linter,
expected_result: &str,
n: usize,
) {
let transformed_str = transform_nth_str(text, &mut linter, n);
if transformed_str.as_str() != expected_result {
panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
}
// Applying the suggestions should fix all the lints.
assert_lint_count(&transformed_str, linter, 0);
}
#[track_caller]
pub fn assert_top3_suggestion_result(
text: &str,
mut linter: impl Linter,
expected_result: &str,
) {
let zeroth = transform_nth_str(text, &mut linter, 0);
let first = transform_nth_str(text, &mut linter, 1);
let second = transform_nth_str(text, &mut linter, 2);
match (
zeroth.as_str() == expected_result,
first.as_str() == expected_result,
second.as_str() == expected_result,
) {
(true, false, false) => assert_lint_count(&zeroth, linter, 0),
(false, true, false) => assert_lint_count(&first, linter, 0),
(false, false, true) => assert_lint_count(&second, linter, 0),
(false, false, false) => panic!(
"None of the top 3 suggestions produced the expected result:\n\
Expected: \"{expected_result}\"\n\
Got:\n\
[0]: \"{zeroth}\"\n\
[1]: \"{first}\"\n\
[2]: \"{second}\""
),
// I think it's not possible for more than one suggestion to be correct
_ => {}
}
}
/// Asserts that none of the suggestions from the linter match the given text.
#[track_caller]
pub fn assert_not_in_suggestion_result(
text: &str,
mut linter: impl Linter,
bad_suggestion: &str,
) {
let test = Document::new_markdown_default_curated(text);
let lints = linter.lint(&test);
for (i, lint) in lints.iter().enumerate() {
for (j, suggestion) in lint.suggestions.iter().enumerate() {
let mut text_chars: Vec<char> = text.chars().collect();
suggestion.apply(lint.span, &mut text_chars);
let suggestion_text: String = text_chars.into_iter().collect();
if suggestion_text == bad_suggestion {
panic!(
"Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
Expected to not find suggestion: \"{bad_suggestion}\"\n\
But found: \"{suggestion_text}\""
);
}
}
}
}
/// Asserts both that the given text matches the expected good suggestions and that none of the
/// suggestions are in the bad suggestions list.
#[track_caller]
pub fn assert_good_and_bad_suggestions(
text: &str,
mut linter: impl Linter,
good: &[&str],
bad: &[&str],
) {
let test = Document::new_markdown_default_curated(text);
let lints = linter.lint(&test);
let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
let mut found_bad = Vec::new();
let mut found_good = Vec::new();
for (i, lint) in lints.into_iter().enumerate() {
for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
let mut text_chars: Vec<char> = text.chars().collect();
suggestion.apply(lint.span, &mut text_chars);
let suggestion_text: String = text_chars.into_iter().collect();
// Check for bad suggestions
if bad.contains(&&*suggestion_text) {
found_bad.push((i, j, suggestion_text.clone()));
eprintln!(
" ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
);
}
// Check for good suggestions
else if good.contains(&&*suggestion_text) {
found_good.push((i, j, suggestion_text.clone()));
eprintln!(
" ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
);
unseen_good.remove(suggestion_text.as_str());
}
}
}
// Print summary
if !found_bad.is_empty() || !unseen_good.is_empty() {
eprintln!("\n=== Test Summary ===");
// In the summary section, change these loops:
if !found_bad.is_empty() {
eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
for (i, j, text) in &found_bad {
eprintln!(" - lint[{i}].suggestions[{j}]: \"{text}\"");
}
}
// And for the good suggestions:
if !unseen_good.is_empty() {
eprintln!(
"\n❌ Missing {} expected good suggestions:",
unseen_good.len()
);
for text in &unseen_good {
eprintln!(" - \"{text}\"");
}
}
eprintln!("\n✅ Found {} good suggestions", found_good.len());
eprintln!("==================\n");
if !found_bad.is_empty() || !unseen_good.is_empty() {
panic!("Test failed - see error output above");
}
} else {
eprintln!(
"\n✅ All {} good suggestions found, no bad suggestions\n",
found_good.len()
);
}
}
fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
let mut text_chars: Vec<char> = text.chars().collect();
let mut iter_count = 0;
loop {
let test = Document::new_from_vec(
text_chars.clone().into(),
&Markdown::default(),
&FstDictionary::curated(),
);
let lints = linter.lint(&test);
if let Some(lint) = lints.first() {
if let Some(sug) = lint.suggestions.get(n) {
sug.apply(lint.span, &mut text_chars);
let transformed_str: String = text_chars.iter().collect();
dbg!(transformed_str);
} else {
break;
}
} else {
break;
}
iter_count += 1;
if iter_count == 100 {
break;
}
}
eprintln!("Corrected {iter_count} times.");
text_chars.iter().collect()
}
}