mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
chore: add weigh/weight to noun/verb confusion (#2315)
* chore: add weigh/weight to noun/verb confusion * fix: -debug printf
This commit is contained in:
parent
9eefe7336e
commit
69db96b370
5 changed files with 58 additions and 44 deletions
|
|
@ -43701,7 +43701,7 @@ seismologist/NgS
|
|||
seismology/Nmg
|
||||
seize/~VGdSU
|
||||
seizure/~NgSV
|
||||
seldom/~R # removed `J` adjective sense is archaic
|
||||
seldom/~R8 # removed `J` adjective sense is archaic
|
||||
select/~JVSGdev
|
||||
selection/~NwSg
|
||||
selective/~JY
|
||||
|
|
|
|||
|
|
@ -531,6 +531,7 @@ impl SequenceExpr {
|
|||
// Adverbs
|
||||
|
||||
gen_then_from_is!(adverb);
|
||||
gen_then_from_is!(frequency_adverb);
|
||||
|
||||
// Determiners
|
||||
|
||||
|
|
|
|||
|
|
@ -332,7 +332,7 @@ pub mod tests {
|
|||
let transformed_str = transform_nth_str(text, &mut linter, n);
|
||||
|
||||
if transformed_str.as_str() != expected_result {
|
||||
panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
|
||||
panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
|
||||
}
|
||||
|
||||
// Applying the suggestions should fix all the lints.
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ pub(crate) const NOUN_VERB_PAIRS: &[(&str, &str)] = &[
|
|||
("emphasis", "emphasize"), // TODO how to handle "emphasise" as well as "emphasize"?
|
||||
("intent", "intend"),
|
||||
// ("proof", "prove"), // "Proof" is also a verb, a synonym of "proofread".
|
||||
("weight", "weigh"),
|
||||
// Add more pairs here as needed
|
||||
];
|
||||
|
||||
|
|
@ -1370,4 +1371,31 @@ mod tests {
|
|||
"Let me give you a piece of advice.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_helps_you_weight() {
|
||||
assert_suggestion_result(
|
||||
"An iOS app that helps you weight small things on the screen of your iPhone / iPad.",
|
||||
NounVerbConfusion::default(),
|
||||
"An iOS app that helps you weigh small things on the screen of your iPhone / iPad.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_you_weight() {
|
||||
assert_suggestion_result(
|
||||
"How much do you weight?",
|
||||
NounVerbConfusion::default(),
|
||||
"How much do you weigh?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_more_than_you_weight() {
|
||||
assert_suggestion_result(
|
||||
"contributed more than you weight",
|
||||
NounVerbConfusion::default(),
|
||||
"contributed more than you weigh",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,43 +1,15 @@
|
|||
use crate::expr::{Expr, FirstMatchOf, LongestMatchOf, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::patterns::Word;
|
||||
use crate::{CharStringExt, Lrc, Token, patterns::WordSet};
|
||||
use crate::{
|
||||
CharStringExt, Lrc, Token,
|
||||
expr::{Expr, FirstMatchOf, LongestMatchOf, SequenceExpr},
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion, expr_linter::Chunk},
|
||||
patterns::{ModalVerb, Word, WordSet},
|
||||
};
|
||||
|
||||
use super::super::NOUN_VERB_PAIRS;
|
||||
|
||||
/// Pronouns that can come before verbs but not nouns
|
||||
const PRONOUNS: &[&str] = &["he", "I", "it", "she", "they", "we", "who", "you"];
|
||||
|
||||
/// Adverbs that can come before verbs but not nouns
|
||||
/// Note: "Sometimes" can come before a noun.
|
||||
const ADVERBS: &[&str] = &["always", "never", "often", "seldom"];
|
||||
|
||||
/// Modal verbs that can come before other verbs but not nouns
|
||||
const MODAL_VERBS_ETC: &[&str] = &[
|
||||
"can",
|
||||
"cannot",
|
||||
"can't",
|
||||
"could",
|
||||
"couldn't",
|
||||
"may",
|
||||
"might",
|
||||
"mightn't",
|
||||
"must",
|
||||
"mustn't",
|
||||
"shall",
|
||||
"shan't",
|
||||
"should",
|
||||
"shouldn't",
|
||||
"will",
|
||||
"won't",
|
||||
"would",
|
||||
"wouldn't",
|
||||
// not modals per se, but modal-like
|
||||
"do",
|
||||
"don't",
|
||||
];
|
||||
|
||||
/// Linter that corrects common noun/verb confusions
|
||||
pub(super) struct GeneralNounInsteadOfVerb {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -45,10 +17,21 @@ pub(super) struct GeneralNounInsteadOfVerb {
|
|||
|
||||
impl Default for GeneralNounInsteadOfVerb {
|
||||
fn default() -> Self {
|
||||
// Adverbs that can come before verbs but not nouns
|
||||
// Note: "Sometimes" can come before a noun.
|
||||
let adverb_of_frequency = |tok: &Token, src: &[char]| {
|
||||
tok.kind.is_frequency_adverb()
|
||||
&& !tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str("sometimes")
|
||||
};
|
||||
|
||||
let pre_context = FirstMatchOf::new(vec![
|
||||
Box::new(WordSet::new(PRONOUNS)),
|
||||
Box::new(WordSet::new(MODAL_VERBS_ETC)),
|
||||
Box::new(WordSet::new(ADVERBS)),
|
||||
Box::new(ModalVerb::with_common_errors()),
|
||||
Box::new(WordSet::new(&["do", "don't", "dont"])),
|
||||
Box::new(adverb_of_frequency),
|
||||
Box::new(Word::new("to")),
|
||||
]);
|
||||
|
||||
|
|
@ -98,13 +81,15 @@ impl ExprLinter for GeneralNounInsteadOfVerb {
|
|||
// If we have the next word token, try to rule out compound nouns
|
||||
if toks.len() > 4 {
|
||||
let following_tok = &toks[4];
|
||||
if following_tok.kind.is_noun() && !following_tok.kind.is_preposition() {
|
||||
if following_tok.kind.is_noun()
|
||||
&& !following_tok.kind.is_proper_noun()
|
||||
&& !following_tok.kind.is_preposition()
|
||||
{
|
||||
// But first rule out marginal "nouns"
|
||||
let following_lower = following_tok.span.get_content_string(src).to_lowercase();
|
||||
if following_lower != "it"
|
||||
&& following_lower != "me"
|
||||
&& following_lower != "on"
|
||||
&& following_lower != "that"
|
||||
if !following_tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_any_ignore_ascii_case_str(&["it", "me", "on", "that"])
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue