From 69db96b370fabdaa2c3068b201ec7b6a41b8118c Mon Sep 17 00:00:00 2001 From: Andrew Dunbar Date: Wed, 10 Dec 2025 21:52:07 +0000 Subject: [PATCH] chore: add weigh/weight to noun/verb confusion (#2315) * chore: add weigh/weight to noun/verb confusion * fix: -debug printf --- harper-core/dictionary.dict | 2 +- harper-core/src/expr/sequence_expr.rs | 1 + harper-core/src/linting/mod.rs | 2 +- .../src/linting/noun_verb_confusion/mod.rs | 28 ++++++++ .../noun_instead_of_verb/general.rs | 69 ++++++++----------- 5 files changed, 58 insertions(+), 44 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 9d87f847..5ba8e549 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -43701,7 +43701,7 @@ seismologist/NgS seismology/Nmg seize/~VGdSU seizure/~NgSV -seldom/~R # removed `J` adjective sense is archaic +seldom/~R8 # removed `J` adjective sense is archaic select/~JVSGdev selection/~NwSg selective/~JY diff --git a/harper-core/src/expr/sequence_expr.rs b/harper-core/src/expr/sequence_expr.rs index e0b81b0b..cdfd9236 100644 --- a/harper-core/src/expr/sequence_expr.rs +++ b/harper-core/src/expr/sequence_expr.rs @@ -531,6 +531,7 @@ impl SequenceExpr { // Adverbs gen_then_from_is!(adverb); + gen_then_from_is!(frequency_adverb); // Determiners diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index e882cd7d..f7bca37a 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -332,7 +332,7 @@ pub mod tests { let transformed_str = transform_nth_str(text, &mut linter, n); if transformed_str.as_str() != expected_result { - panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\""); + panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\""); } // Applying the suggestions should fix all the lints. diff --git a/harper-core/src/linting/noun_verb_confusion/mod.rs b/harper-core/src/linting/noun_verb_confusion/mod.rs index 8685d0ad..95d57b5a 100644 --- a/harper-core/src/linting/noun_verb_confusion/mod.rs +++ b/harper-core/src/linting/noun_verb_confusion/mod.rs @@ -14,6 +14,7 @@ pub(crate) const NOUN_VERB_PAIRS: &[(&str, &str)] = &[ ("emphasis", "emphasize"), // TODO how to handle "emphasise" as well as "emphasize"? ("intent", "intend"), // ("proof", "prove"), // "Proof" is also a verb, a synonym of "proofread". + ("weight", "weigh"), // Add more pairs here as needed ]; @@ -1370,4 +1371,31 @@ mod tests { "Let me give you a piece of advice.", ); } + + #[test] + fn fix_helps_you_weight() { + assert_suggestion_result( + "An iOS app that helps you weight small things on the screen of your iPhone / iPad.", + NounVerbConfusion::default(), + "An iOS app that helps you weigh small things on the screen of your iPhone / iPad.", + ); + } + + #[test] + fn fix_do_you_weight() { + assert_suggestion_result( + "How much do you weight?", + NounVerbConfusion::default(), + "How much do you weigh?", + ); + } + + #[test] + fn fix_more_than_you_weight() { + assert_suggestion_result( + "contributed more than you weight", + NounVerbConfusion::default(), + "contributed more than you weigh", + ); + } } diff --git a/harper-core/src/linting/noun_verb_confusion/noun_instead_of_verb/general.rs b/harper-core/src/linting/noun_verb_confusion/noun_instead_of_verb/general.rs index 43428c46..79da7b4d 100644 --- a/harper-core/src/linting/noun_verb_confusion/noun_instead_of_verb/general.rs +++ b/harper-core/src/linting/noun_verb_confusion/noun_instead_of_verb/general.rs @@ -1,43 +1,15 @@ -use crate::expr::{Expr, FirstMatchOf, LongestMatchOf, SequenceExpr}; -use crate::linting::expr_linter::Chunk; -use crate::linting::{ExprLinter, Lint, LintKind, Suggestion}; -use crate::patterns::Word; -use crate::{CharStringExt, Lrc, Token, patterns::WordSet}; +use crate::{ + CharStringExt, Lrc, Token, + expr::{Expr, FirstMatchOf, LongestMatchOf, SequenceExpr}, + linting::{ExprLinter, Lint, LintKind, Suggestion, expr_linter::Chunk}, + patterns::{ModalVerb, Word, WordSet}, +}; use super::super::NOUN_VERB_PAIRS; /// Pronouns that can come before verbs but not nouns const PRONOUNS: &[&str] = &["he", "I", "it", "she", "they", "we", "who", "you"]; -/// Adverbs that can come before verbs but not nouns -/// Note: "Sometimes" can come before a noun. -const ADVERBS: &[&str] = &["always", "never", "often", "seldom"]; - -/// Modal verbs that can come before other verbs but not nouns -const MODAL_VERBS_ETC: &[&str] = &[ - "can", - "cannot", - "can't", - "could", - "couldn't", - "may", - "might", - "mightn't", - "must", - "mustn't", - "shall", - "shan't", - "should", - "shouldn't", - "will", - "won't", - "would", - "wouldn't", - // not modals per se, but modal-like - "do", - "don't", -]; - /// Linter that corrects common noun/verb confusions pub(super) struct GeneralNounInsteadOfVerb { expr: Box, @@ -45,10 +17,21 @@ pub(super) struct GeneralNounInsteadOfVerb { impl Default for GeneralNounInsteadOfVerb { fn default() -> Self { + // Adverbs that can come before verbs but not nouns + // Note: "Sometimes" can come before a noun. + let adverb_of_frequency = |tok: &Token, src: &[char]| { + tok.kind.is_frequency_adverb() + && !tok + .span + .get_content(src) + .eq_ignore_ascii_case_str("sometimes") + }; + let pre_context = FirstMatchOf::new(vec![ Box::new(WordSet::new(PRONOUNS)), - Box::new(WordSet::new(MODAL_VERBS_ETC)), - Box::new(WordSet::new(ADVERBS)), + Box::new(ModalVerb::with_common_errors()), + Box::new(WordSet::new(&["do", "don't", "dont"])), + Box::new(adverb_of_frequency), Box::new(Word::new("to")), ]); @@ -98,13 +81,15 @@ impl ExprLinter for GeneralNounInsteadOfVerb { // If we have the next word token, try to rule out compound nouns if toks.len() > 4 { let following_tok = &toks[4]; - if following_tok.kind.is_noun() && !following_tok.kind.is_preposition() { + if following_tok.kind.is_noun() + && !following_tok.kind.is_proper_noun() + && !following_tok.kind.is_preposition() + { // But first rule out marginal "nouns" - let following_lower = following_tok.span.get_content_string(src).to_lowercase(); - if following_lower != "it" - && following_lower != "me" - && following_lower != "on" - && following_lower != "that" + if !following_tok + .span + .get_content(src) + .eq_any_ignore_ascii_case_str(&["it", "me", "on", "that"]) { return None; }