mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
516 lines
14 KiB
Rust
516 lines
14 KiB
Rust
//! Frameworks and rules that locate errors in text.
|
|
//!
|
|
//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
|
|
|
|
mod a_part;
|
|
mod a_while;
|
|
mod addicting;
|
|
mod adjective_double_degree;
|
|
mod adjective_of_a;
|
|
mod after_later;
|
|
mod all_intents_and_purposes;
|
|
mod allow_to;
|
|
mod am_in_the_morning;
|
|
mod amounts_for;
|
|
mod an_a;
|
|
mod and_in;
|
|
mod and_the_like;
|
|
mod another_thing_coming;
|
|
mod another_think_coming;
|
|
mod apart_from;
|
|
mod ask_no_preposition;
|
|
mod avoid_curses;
|
|
mod back_in_the_day;
|
|
mod be_allowed;
|
|
mod best_of_all_time;
|
|
mod boring_words;
|
|
mod bought;
|
|
mod brand_brandish;
|
|
mod call_them;
|
|
mod cant;
|
|
mod capitalize_personal_pronouns;
|
|
mod cautionary_tale;
|
|
mod change_tack;
|
|
mod chock_full;
|
|
mod closed_compounds;
|
|
mod comma_fixes;
|
|
mod compound_nouns;
|
|
mod compound_subject_i;
|
|
mod confident;
|
|
mod correct_number_suffix;
|
|
mod criteria_phenomena;
|
|
mod cure_for;
|
|
mod currency_placement;
|
|
mod dashes;
|
|
mod despite_of;
|
|
mod determiner_without_noun;
|
|
mod didnt;
|
|
mod discourse_markers;
|
|
mod disjoint_prefixes;
|
|
mod dot_initialisms;
|
|
mod double_click;
|
|
mod double_modal;
|
|
mod ellipsis_length;
|
|
mod else_possessive;
|
|
mod ever_every;
|
|
mod everyday;
|
|
mod expand_memory_shorthands;
|
|
mod expand_time_shorthands;
|
|
mod expr_linter;
|
|
mod far_be_it;
|
|
mod fascinated_by;
|
|
mod feel_fell;
|
|
mod few_units_of_time_ago;
|
|
mod filler_words;
|
|
mod find_fine;
|
|
mod first_aid_kit;
|
|
mod for_noun;
|
|
mod free_predicate;
|
|
mod friend_of_me;
|
|
mod go_so_far_as_to;
|
|
mod handful;
|
|
mod have_pronoun;
|
|
mod have_take_a_look;
|
|
mod hedging;
|
|
mod hello_greeting;
|
|
mod hereby;
|
|
mod hop_hope;
|
|
mod hope_youre;
|
|
mod how_to;
|
|
mod hyphenate_number_day;
|
|
mod i_am_agreement;
|
|
mod if_wouldve;
|
|
mod in_on_the_cards;
|
|
mod inflected_verb_after_to;
|
|
mod initialism_linter;
|
|
mod initialisms;
|
|
mod interested_in;
|
|
mod it_is;
|
|
mod it_looks_like_that;
|
|
mod it_would_be;
|
|
mod its_contraction;
|
|
mod its_possessive;
|
|
mod jealous_of;
|
|
mod johns_hopkins;
|
|
mod left_right_hand;
|
|
mod less_worse;
|
|
mod let_to_do;
|
|
mod lets_confusion;
|
|
mod likewise;
|
|
mod lint;
|
|
mod lint_group;
|
|
mod lint_kind;
|
|
mod long_sentences;
|
|
mod looking_forward_to;
|
|
mod map_phrase_linter;
|
|
mod map_phrase_set_linter;
|
|
mod mass_nouns;
|
|
mod merge_linters;
|
|
mod merge_words;
|
|
mod missing_preposition;
|
|
mod missing_space;
|
|
mod missing_to;
|
|
mod misspell;
|
|
mod mixed_bag;
|
|
mod modal_be_adjective;
|
|
mod modal_of;
|
|
mod modal_seem;
|
|
mod months;
|
|
mod more_better;
|
|
mod most_number;
|
|
mod most_of_the_times;
|
|
mod multiple_sequential_pronouns;
|
|
mod nail_on_the_head;
|
|
mod need_to_noun;
|
|
mod no_french_spaces;
|
|
mod no_match_for;
|
|
mod no_oxford_comma;
|
|
mod nobody;
|
|
mod nominal_wants;
|
|
mod noun_verb_confusion;
|
|
mod number_suffix_capitalization;
|
|
mod of_course;
|
|
mod on_floor;
|
|
mod once_or_twice;
|
|
mod one_and_the_same;
|
|
mod open_compounds;
|
|
mod open_the_light;
|
|
mod orthographic_consistency;
|
|
mod ought_to_be;
|
|
mod out_of_date;
|
|
mod oxford_comma;
|
|
mod oxymorons;
|
|
mod phrasal_verb_as_compound_noun;
|
|
mod phrase_corrections;
|
|
mod phrase_set_corrections;
|
|
mod pique_interest;
|
|
mod possessive_noun;
|
|
mod possessive_your;
|
|
mod progressive_needs_be;
|
|
mod pronoun_are;
|
|
mod pronoun_contraction;
|
|
mod pronoun_inflection_be;
|
|
mod pronoun_knew;
|
|
mod proper_noun_capitalization_linters;
|
|
mod quantifier_needs_of;
|
|
mod quantifier_numeral_conflict;
|
|
mod quite_quiet;
|
|
mod quote_spacing;
|
|
mod redundant_acronyms;
|
|
mod redundant_additive_adverbs;
|
|
mod regionalisms;
|
|
mod repeated_words;
|
|
mod respond;
|
|
mod right_click;
|
|
mod roller_skated;
|
|
mod safe_to_save;
|
|
mod save_to_safe;
|
|
mod semicolon_apostrophe;
|
|
mod sentence_capitalization;
|
|
mod shoot_oneself_in_the_foot;
|
|
mod simple_past_to_past_participle;
|
|
mod since_duration;
|
|
mod single_be;
|
|
mod some_without_article;
|
|
mod something_is;
|
|
mod somewhat_something;
|
|
mod soon_to_be;
|
|
mod sought_after;
|
|
mod spaces;
|
|
mod spell_check;
|
|
mod spelled_numbers;
|
|
mod split_words;
|
|
mod subject_pronoun;
|
|
mod suggestion;
|
|
mod take_medicine;
|
|
mod take_serious;
|
|
mod that_than;
|
|
mod that_which;
|
|
mod the_how_why;
|
|
mod the_my;
|
|
mod then_than;
|
|
mod theres;
|
|
mod theses_these;
|
|
mod thing_think;
|
|
mod though_thought;
|
|
mod throw_away;
|
|
mod throw_rubbish;
|
|
mod to_adverb;
|
|
mod to_two_too;
|
|
mod touristic;
|
|
mod unclosed_quotes;
|
|
mod update_place_names;
|
|
mod use_genitive;
|
|
mod use_title_case;
|
|
mod verb_to_adjective;
|
|
mod very_unique;
|
|
mod vice_versa;
|
|
mod was_aloud;
|
|
mod way_too_adjective;
|
|
mod well_educated;
|
|
mod whereas;
|
|
mod widely_accepted;
|
|
mod win_prize;
|
|
mod wish_could;
|
|
mod wordpress_dotcom;
|
|
mod would_never_have;
|
|
|
|
pub use expr_linter::ExprLinter;
|
|
pub use initialism_linter::InitialismLinter;
|
|
pub use lint::Lint;
|
|
pub use lint_group::{LintGroup, LintGroupConfig};
|
|
pub use lint_kind::LintKind;
|
|
pub use map_phrase_linter::MapPhraseLinter;
|
|
pub use map_phrase_set_linter::MapPhraseSetLinter;
|
|
pub use spell_check::SpellCheck;
|
|
pub use suggestion::Suggestion;
|
|
|
|
use crate::{Document, LSend, render_markdown};
|
|
|
|
/// A __stateless__ rule that searches documents for grammatical errors.
|
|
///
|
|
/// Commonly implemented via [`ExprLinter`].
|
|
///
|
|
/// See also: [`LintGroup`].
|
|
pub trait Linter: LSend {
|
|
/// Analyzes a document and produces zero or more [`Lint`]s.
|
|
/// We pass `self` mutably for caching purposes.
|
|
fn lint(&mut self, document: &Document) -> Vec<Lint>;
|
|
/// A user-facing description of what kinds of grammatical errors this rule looks for.
|
|
/// It is usually shown in settings menus.
|
|
fn description(&self) -> &str;
|
|
}
|
|
|
|
/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
|
|
pub trait HtmlDescriptionLinter {
|
|
fn description_html(&self) -> String;
|
|
}
|
|
|
|
impl<L: ?Sized> HtmlDescriptionLinter for L
|
|
where
|
|
L: Linter,
|
|
{
|
|
fn description_html(&self) -> String {
|
|
let desc = self.description();
|
|
render_markdown(desc)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub mod tests {
|
|
use crate::parsers::Markdown;
|
|
use crate::{Document, Span, Token};
|
|
use hashbrown::HashSet;
|
|
|
|
/// Extension trait for converting spans of tokens back to their original text
|
|
pub trait SpanVecExt {
|
|
fn to_strings(&self, doc: &Document) -> Vec<String>;
|
|
}
|
|
|
|
impl SpanVecExt for Vec<Span<Token>> {
|
|
fn to_strings(&self, doc: &Document) -> Vec<String> {
|
|
self.iter()
|
|
.map(|sp| {
|
|
doc.get_tokens()[sp.start..sp.end]
|
|
.iter()
|
|
.map(|tok| doc.get_span_content_str(&tok.span))
|
|
.collect::<String>()
|
|
})
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
use super::Linter;
|
|
use crate::spell::FstDictionary;
|
|
|
|
#[track_caller]
|
|
pub fn assert_no_lints(text: &str, linter: impl Linter) {
|
|
assert_lint_count(text, linter, 0);
|
|
}
|
|
|
|
#[track_caller]
|
|
pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
|
|
let test = Document::new_markdown_default_curated(text);
|
|
let lints = linter.lint(&test);
|
|
dbg!(&lints);
|
|
if lints.len() != count {
|
|
panic!(
|
|
"Expected \"{text}\" to create {count} lints, but it created {}.",
|
|
lints.len()
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
|
|
/// [`Lint`]s.
|
|
#[track_caller]
|
|
pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
|
|
let test = Document::new_markdown_default_curated(text);
|
|
let lints = linter.lint(&test);
|
|
assert_eq!(
|
|
lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
|
|
count
|
|
);
|
|
}
|
|
|
|
/// Runs a provided linter on text, applies the first suggestion from each lint
|
|
/// and asserts whether the result is equal to a given value.
|
|
#[track_caller]
|
|
pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
|
|
assert_nth_suggestion_result(text, linter, expected_result, 0);
|
|
}
|
|
|
|
/// Runs a provided linter on text, applies the nth suggestion from each lint
|
|
/// and asserts whether the result is equal to a given value.
|
|
///
|
|
/// Note that `n` starts at zero.
|
|
#[track_caller]
|
|
pub fn assert_nth_suggestion_result(
|
|
text: &str,
|
|
mut linter: impl Linter,
|
|
expected_result: &str,
|
|
n: usize,
|
|
) {
|
|
let transformed_str = transform_nth_str(text, &mut linter, n);
|
|
|
|
if transformed_str.as_str() != expected_result {
|
|
panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
|
|
}
|
|
|
|
// Applying the suggestions should fix all the lints.
|
|
assert_lint_count(&transformed_str, linter, 0);
|
|
}
|
|
|
|
#[track_caller]
|
|
pub fn assert_top3_suggestion_result(
|
|
text: &str,
|
|
mut linter: impl Linter,
|
|
expected_result: &str,
|
|
) {
|
|
let zeroth = transform_nth_str(text, &mut linter, 0);
|
|
let first = transform_nth_str(text, &mut linter, 1);
|
|
let second = transform_nth_str(text, &mut linter, 2);
|
|
|
|
match (
|
|
zeroth.as_str() == expected_result,
|
|
first.as_str() == expected_result,
|
|
second.as_str() == expected_result,
|
|
) {
|
|
(true, false, false) => assert_lint_count(&zeroth, linter, 0),
|
|
(false, true, false) => assert_lint_count(&first, linter, 0),
|
|
(false, false, true) => assert_lint_count(&second, linter, 0),
|
|
(false, false, false) => panic!(
|
|
"None of the top 3 suggestions produced the expected result:\n\
|
|
Expected: \"{expected_result}\"\n\
|
|
Got:\n\
|
|
[0]: \"{zeroth}\"\n\
|
|
[1]: \"{first}\"\n\
|
|
[2]: \"{second}\""
|
|
),
|
|
// I think it's not possible for more than one suggestion to be correct
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
/// Asserts that none of the suggestions from the linter match the given text.
|
|
#[track_caller]
|
|
pub fn assert_not_in_suggestion_result(
|
|
text: &str,
|
|
mut linter: impl Linter,
|
|
bad_suggestion: &str,
|
|
) {
|
|
let test = Document::new_markdown_default_curated(text);
|
|
let lints = linter.lint(&test);
|
|
|
|
for (i, lint) in lints.iter().enumerate() {
|
|
for (j, suggestion) in lint.suggestions.iter().enumerate() {
|
|
let mut text_chars: Vec<char> = text.chars().collect();
|
|
suggestion.apply(lint.span, &mut text_chars);
|
|
let suggestion_text: String = text_chars.into_iter().collect();
|
|
|
|
if suggestion_text == bad_suggestion {
|
|
panic!(
|
|
"Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
|
|
Expected to not find suggestion: \"{bad_suggestion}\"\n\
|
|
But found: \"{suggestion_text}\""
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Asserts both that the given text matches the expected good suggestions and that none of the
|
|
/// suggestions are in the bad suggestions list.
|
|
#[track_caller]
|
|
pub fn assert_good_and_bad_suggestions(
|
|
text: &str,
|
|
mut linter: impl Linter,
|
|
good: &[&str],
|
|
bad: &[&str],
|
|
) {
|
|
let test = Document::new_markdown_default_curated(text);
|
|
let lints = linter.lint(&test);
|
|
|
|
let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
|
|
let mut found_bad = Vec::new();
|
|
let mut found_good = Vec::new();
|
|
|
|
for (i, lint) in lints.into_iter().enumerate() {
|
|
for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
|
|
let mut text_chars: Vec<char> = text.chars().collect();
|
|
suggestion.apply(lint.span, &mut text_chars);
|
|
let suggestion_text: String = text_chars.into_iter().collect();
|
|
|
|
// Check for bad suggestions
|
|
if bad.contains(&&*suggestion_text) {
|
|
found_bad.push((i, j, suggestion_text.clone()));
|
|
eprintln!(
|
|
" ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
|
|
);
|
|
}
|
|
// Check for good suggestions
|
|
else if good.contains(&&*suggestion_text) {
|
|
found_good.push((i, j, suggestion_text.clone()));
|
|
eprintln!(
|
|
" ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
|
|
);
|
|
unseen_good.remove(suggestion_text.as_str());
|
|
}
|
|
}
|
|
}
|
|
|
|
// Print summary
|
|
if !found_bad.is_empty() || !unseen_good.is_empty() {
|
|
eprintln!("\n=== Test Summary ===");
|
|
|
|
// In the summary section, change these loops:
|
|
if !found_bad.is_empty() {
|
|
eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
|
|
for (i, j, text) in &found_bad {
|
|
eprintln!(" - lint[{i}].suggestions[{j}]: \"{text}\"");
|
|
}
|
|
}
|
|
|
|
// And for the good suggestions:
|
|
if !unseen_good.is_empty() {
|
|
eprintln!(
|
|
"\n❌ Missing {} expected good suggestions:",
|
|
unseen_good.len()
|
|
);
|
|
for text in &unseen_good {
|
|
eprintln!(" - \"{text}\"");
|
|
}
|
|
}
|
|
|
|
eprintln!("\n✅ Found {} good suggestions", found_good.len());
|
|
eprintln!("==================\n");
|
|
|
|
if !found_bad.is_empty() || !unseen_good.is_empty() {
|
|
panic!("Test failed - see error output above");
|
|
}
|
|
} else {
|
|
eprintln!(
|
|
"\n✅ All {} good suggestions found, no bad suggestions\n",
|
|
found_good.len()
|
|
);
|
|
}
|
|
}
|
|
|
|
fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
|
|
let mut text_chars: Vec<char> = text.chars().collect();
|
|
|
|
let mut iter_count = 0;
|
|
|
|
loop {
|
|
let test = Document::new_from_vec(
|
|
text_chars.clone().into(),
|
|
&Markdown::default(),
|
|
&FstDictionary::curated(),
|
|
);
|
|
let lints = linter.lint(&test);
|
|
|
|
if let Some(lint) = lints.first() {
|
|
if let Some(sug) = lint.suggestions.get(n) {
|
|
sug.apply(lint.span, &mut text_chars);
|
|
|
|
let transformed_str: String = text_chars.iter().collect();
|
|
dbg!(transformed_str);
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
iter_count += 1;
|
|
|
|
if iter_count == 100 {
|
|
break;
|
|
}
|
|
}
|
|
|
|
eprintln!("Corrected {iter_count} times.");
|
|
|
|
text_chars.iter().collect()
|
|
}
|
|
}
|