mirror of
https://github.com/Automattic/harper.git
synced 2025-07-07 21:15:01 +00:00
feat: in the cards ⇔ on the cards (#1454)
* feat: in the cards ⇔ on the cards * feat: correct wrong regional variant of in/on the cards * fix(core): `WordSet` now supports typographic punct * fix: appease clippy * fix: add colours for new `Regionalism` lint kind Also adds new colours for lint kinds that were sharing colours with other lint kinds As a bonus I sorted them by lint kind and added colour descriptions in comments. * chore: `just format` --------- Co-authored-by: Elijah Potter <me@elijahpotter.dev>
This commit is contained in:
parent
a8983e3f8a
commit
c69774dcd2
8 changed files with 343 additions and 35 deletions
273
harper-core/src/linting/in_on_the_cards.rs
Normal file
273
harper-core/src/linting/in_on_the_cards.rs
Normal file
|
@ -0,0 +1,273 @@
|
|||
use crate::{
|
||||
Dialect, Token,
|
||||
expr::{Expr, FixedPhrase, LongestMatchOf, SequenceExpr},
|
||||
linting::{LintKind, Suggestion},
|
||||
patterns::{InflectionOfBe, WordSet},
|
||||
};
|
||||
|
||||
use super::{ExprLinter, Lint};
|
||||
|
||||
pub struct InOnTheCards {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl InOnTheCards {
|
||||
pub fn new(dialect: Dialect) -> Self {
|
||||
// Quick research suggested that Australian and Canadian English agree with American English.
|
||||
let preposition = match dialect {
|
||||
Dialect::British => "in",
|
||||
_ => "on",
|
||||
};
|
||||
|
||||
let pre_context = LongestMatchOf::new(vec![
|
||||
Box::new(InflectionOfBe::new()),
|
||||
Box::new(WordSet::new(&[
|
||||
"isn't", "it's", "wasn't", "weren't", "not", "isnt", "its", "wasnt", "werent",
|
||||
])),
|
||||
]);
|
||||
|
||||
let expr = SequenceExpr::default()
|
||||
.then(pre_context)
|
||||
.t_ws()
|
||||
.t_aco(preposition)
|
||||
.then(FixedPhrase::from_phrase(" the cards"));
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for InOnTheCards {
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let prep_span = toks[2].span;
|
||||
let prep = prep_span.get_content(src);
|
||||
|
||||
let sugg = Suggestion::ReplaceWith(
|
||||
[
|
||||
match prep[0] {
|
||||
'i' => 'o',
|
||||
'o' => 'i',
|
||||
'I' => 'O',
|
||||
'O' => 'I',
|
||||
_ => return None,
|
||||
},
|
||||
prep[1],
|
||||
]
|
||||
.to_vec(),
|
||||
);
|
||||
|
||||
let message = "Corrects either `in the cards` or `on the cards` to the other, depending on the dialect.".into();
|
||||
|
||||
Some(Lint {
|
||||
span: prep_span,
|
||||
lint_kind: LintKind::Regionalism,
|
||||
suggestions: vec![sugg],
|
||||
message,
|
||||
priority: 63,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Corrects either `in the cards` or `on the cards` to the other, depending on the dialect."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
Dialect,
|
||||
linting::{
|
||||
InOnTheCards,
|
||||
tests::{assert_lint_count, assert_suggestion_result},
|
||||
},
|
||||
};
|
||||
|
||||
// On the cards
|
||||
|
||||
#[test]
|
||||
fn correct_are_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"Both these features are on the cards, but for now we want to let users know if they have requested an invalid example.",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"Both these features are in the cards, but for now we want to let users know if they have requested an invalid example.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_correct_is_on_for_british() {
|
||||
assert_lint_count(
|
||||
"Yes, I think this is on the cards.",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_not_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"If a permanent unique identifier is not on the cards any time soon for WebHID, we should consider a WebUSB alternative.",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"If a permanent unique identifier is not in the cards any time soon for WebHID, we should consider a WebUSB alternative.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_be_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"a full breach of genomics (patient?) data can be on the cards since S3 AWS bucket credentials can be slurped from the process's memory",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"a full breach of genomics (patient?) data can be in the cards since S3 AWS bucket credentials can be slurped from the process's memory",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_was_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"Virtualising the message summaries ObservableCollection was on the cards so I also take note of your last point.",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"Virtualising the message summaries ObservableCollection was in the cards so I also take note of your last point.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_isnt_on_no_apostrophe_for_american() {
|
||||
assert_suggestion_result(
|
||||
"parallelising that part isnt on the cards since there would be no noticeable ...",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"parallelising that part isnt in the cards since there would be no noticeable ...",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_its_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"Regarding extensive documentation, as mentioned, its on the cards, project being sponsored by the aforementioned organisations.",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"Regarding extensive documentation, as mentioned, its in the cards, project being sponsored by the aforementioned organisations.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_were_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"lots of high altitudes were on the cards again",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"lots of high altitudes were in the cards again",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_isnt_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"downgrading to an end-of-life operating system isn't on the cards",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"downgrading to an end-of-life operating system isn't in the cards",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_wasnt_on_for_american() {
|
||||
assert_suggestion_result(
|
||||
"it's only a middleground for an org because passwordless wasn't on the cards previously",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
"it's only a middleground for an org because passwordless wasn't in the cards previously",
|
||||
);
|
||||
}
|
||||
|
||||
// In the cards
|
||||
|
||||
#[test]
|
||||
fn correct_was_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"Just wondering if it was in the cards or not for something like the Quest3 to get support in the future.",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"Just wondering if it was on the cards or not for something like the Quest3 to get support in the future.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_correct_is_in_for_american() {
|
||||
assert_lint_count(
|
||||
"Not sure if such a project is in the cards",
|
||||
InOnTheCards::new(Dialect::American),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_not_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"Is that just not in the cards for WASM at this time?",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"Is that just not on the cards for WASM at this time?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_be_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"Would this be in the cards?",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"Would this be on the cards?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_are_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"Manifest files are in the cards but haven't been implemented yet.",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"Manifest files are on the cards but haven't been implemented yet.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_its_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"As far as an error, that probably would be helpful but doesn't sound like its in the cards.",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"As far as an error, that probably would be helpful but doesn't sound like its on the cards.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_were_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"a year or two given the major overhauls that were in the cards at the time",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"a year or two given the major overhauls that were on the cards at the time",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_isnt_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"I'm going to close this as opting out of the installation framework that Electron gives us isn't in the cards for the project at this time.",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"I'm going to close this as opting out of the installation framework that Electron gives us isn't on the cards for the project at this time.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_wasnt_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"doing something better than just swapping our internal log package for glog wasn’t in the cards back then",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"doing something better than just swapping our internal log package for glog wasn’t on the cards back then",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_werent_in_for_british() {
|
||||
assert_suggestion_result(
|
||||
"I had thought stacked borrows was mostly in a final tweaking phase and major changes weren't in the cards.",
|
||||
InOnTheCards::new(Dialect::British),
|
||||
"I had thought stacked borrows was mostly in a final tweaking phase and major changes weren't on the cards.",
|
||||
);
|
||||
}
|
||||
}
|
|
@ -44,6 +44,7 @@ use super::hereby::Hereby;
|
|||
use super::hop_hope::HopHope;
|
||||
use super::how_to::HowTo;
|
||||
use super::hyphenate_number_day::HyphenateNumberDay;
|
||||
use super::in_on_the_cards::InOnTheCards;
|
||||
use super::inflected_verb_after_to::InflectedVerbAfterTo;
|
||||
use super::its_contraction::ItsContraction;
|
||||
use super::left_right_hand::LeftRightHand;
|
||||
|
@ -460,6 +461,9 @@ impl LintGroup {
|
|||
);
|
||||
out.config.set_rule_enabled("InflectedVerbAfterTo", true);
|
||||
|
||||
out.add("InOnTheCards", InOnTheCards::new(dialect));
|
||||
out.config.set_rule_enabled("InOnTheCards", true);
|
||||
|
||||
out.add(
|
||||
"SentenceCapitalization",
|
||||
SentenceCapitalization::new(dictionary.clone()),
|
||||
|
|
|
@ -16,6 +16,7 @@ pub enum LintKind {
|
|||
Punctuation,
|
||||
Readability,
|
||||
Redundancy,
|
||||
Regionalism,
|
||||
Repetition,
|
||||
/// This should only be used by linters doing spellcheck on individual words.
|
||||
Spelling,
|
||||
|
@ -32,6 +33,7 @@ impl LintKind {
|
|||
"Miscellaneous" => LintKind::Miscellaneous,
|
||||
"Readability" => LintKind::Readability,
|
||||
"Redundancy" => LintKind::Redundancy,
|
||||
"Regionalism" => LintKind::Regionalism,
|
||||
"Repetition" => LintKind::Repetition,
|
||||
"Spelling" => LintKind::Spelling,
|
||||
"Style" => LintKind::Style,
|
||||
|
@ -50,6 +52,7 @@ impl LintKind {
|
|||
LintKind::Punctuation => "Punctuation",
|
||||
LintKind::Readability => "Readability",
|
||||
LintKind::Redundancy => "Redundancy",
|
||||
LintKind::Regionalism => "Regionalism",
|
||||
LintKind::Repetition => "Repetition",
|
||||
LintKind::Spelling => "Spelling",
|
||||
LintKind::Style => "Style",
|
||||
|
@ -69,6 +72,7 @@ impl Display for LintKind {
|
|||
LintKind::Punctuation => "Punctuation",
|
||||
LintKind::Readability => "Readability",
|
||||
LintKind::Redundancy => "Redundancy",
|
||||
LintKind::Regionalism => "Regionalism",
|
||||
LintKind::Repetition => "Repetition",
|
||||
LintKind::Spelling => "Spelling",
|
||||
LintKind::Style => "Style",
|
||||
|
|
|
@ -38,6 +38,7 @@ mod hereby;
|
|||
mod hop_hope;
|
||||
mod how_to;
|
||||
mod hyphenate_number_day;
|
||||
mod in_on_the_cards;
|
||||
mod inflected_verb_after_to;
|
||||
mod initialism_linter;
|
||||
mod initialisms;
|
||||
|
@ -138,6 +139,7 @@ pub use hereby::Hereby;
|
|||
pub use hop_hope::HopHope;
|
||||
pub use how_to::HowTo;
|
||||
pub use hyphenate_number_day::HyphenateNumberDay;
|
||||
pub use in_on_the_cards::InOnTheCards;
|
||||
pub use inflected_verb_after_to::InflectedVerbAfterTo;
|
||||
pub use initialism_linter::InitialismLinter;
|
||||
pub use its_contraction::ItsContraction;
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::{CharString, Token};
|
|||
|
||||
/// Matches a predefined word.
|
||||
///
|
||||
/// Note that any capitalization of the contained words will result in a match.
|
||||
/// Note that any capitalization of the contained word will result in a match.
|
||||
#[derive(Clone)]
|
||||
pub struct Word {
|
||||
word: CharString,
|
||||
|
|
|
@ -50,10 +50,20 @@ impl SingleTokenPattern for WordSet {
|
|||
continue;
|
||||
}
|
||||
|
||||
fn canonical(c: &char) -> char {
|
||||
match c {
|
||||
'\u{2018}' | '\u{2019}' | '\u{02BC}' | '\u{FF07}' => '\'',
|
||||
'\u{201C}' | '\u{201D}' | '\u{FF02}' => '"',
|
||||
'\u{2013}' | '\u{2014}' | '\u{2212}' | '\u{FF0D}' => '-',
|
||||
_ => *c,
|
||||
}
|
||||
}
|
||||
|
||||
let partial_match = tok_chars
|
||||
.iter()
|
||||
.zip(word)
|
||||
.all(|(a, b)| a.eq_ignore_ascii_case(b));
|
||||
.map(canonical)
|
||||
.zip(word.iter().map(canonical))
|
||||
.all(|(a, b)| a.eq_ignore_ascii_case(&b));
|
||||
|
||||
if partial_match {
|
||||
return true;
|
||||
|
@ -91,4 +101,15 @@ mod tests {
|
|||
|
||||
assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn supports_typographic_apostrophes() {
|
||||
let set = WordSet::new(&["They're"]);
|
||||
|
||||
let doc = Document::new_markdown_default_curated("They’re");
|
||||
|
||||
let matches = set.find_all_matches_in_doc(&doc);
|
||||
|
||||
assert_eq!(matches, vec![Span::new(0, 1)]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,25 +1,27 @@
|
|||
export default function lintKindColor(lintKindKey: string): string {
|
||||
switch (lintKindKey) {
|
||||
case 'Spelling':
|
||||
return '#EE4266';
|
||||
case 'Capitalization':
|
||||
return '#540D6E';
|
||||
case 'Style':
|
||||
return '#FFD23F';
|
||||
case 'Formatting':
|
||||
return '#540D6E';
|
||||
case 'Repetition':
|
||||
return '#3BCEAC';
|
||||
return '#540D6E'; // Deep purple
|
||||
case 'Enhancement':
|
||||
return '#0EAD69';
|
||||
case 'Readability':
|
||||
return '#0EAD69';
|
||||
case 'WordChoice':
|
||||
return '#0EAD69';
|
||||
return '#0EAD69'; // Green
|
||||
case 'Formatting':
|
||||
return '#7D3C98'; // Amethyst purple
|
||||
case 'Miscellaneous':
|
||||
return '#3BCEAC';
|
||||
return '#3BCEAC'; // Turquoise
|
||||
case 'Punctuation':
|
||||
return '#D4850F';
|
||||
return '#D4850F'; // Dark orange
|
||||
case 'Readability':
|
||||
return '#2E8B57'; // Sea green
|
||||
case 'Regionalism':
|
||||
return '#C061CB'; // Vibrant purple
|
||||
case 'Repetition':
|
||||
return '#00A67C'; // Green-cyan
|
||||
case 'Spelling':
|
||||
return '#EE4266'; // Pink-red
|
||||
case 'Style':
|
||||
return '#FFD23F'; // Yellow
|
||||
case 'WordChoice':
|
||||
return '#228B22'; // Forest green
|
||||
default:
|
||||
throw new Error(`Unexpected lint kind: ${lintKindKey}`);
|
||||
}
|
||||
|
|
|
@ -1,25 +1,27 @@
|
|||
export default function lintKindColor(lintKindKey: string): string {
|
||||
switch (lintKindKey) {
|
||||
case 'Spelling':
|
||||
return '#EE4266';
|
||||
case 'Capitalization':
|
||||
return '#540D6E';
|
||||
case 'Style':
|
||||
return '#FFD23F';
|
||||
case 'Formatting':
|
||||
return '#540D6E';
|
||||
case 'Repetition':
|
||||
return '#3BCEAC';
|
||||
return '#540D6E'; // Deep purple
|
||||
case 'Enhancement':
|
||||
return '#0EAD69';
|
||||
case 'Readability':
|
||||
return '#0EAD69';
|
||||
case 'WordChoice':
|
||||
return '#0EAD69';
|
||||
return '#0EAD69'; // Green
|
||||
case 'Formatting':
|
||||
return '#7D3C98'; // Amethyst purple
|
||||
case 'Miscellaneous':
|
||||
return '#3BCEAC';
|
||||
return '#3BCEAC'; // Turquoise
|
||||
case 'Punctuation':
|
||||
return '#D4850F';
|
||||
return '#D4850F'; // Dark orange
|
||||
case 'Readability':
|
||||
return '#2E8B57'; // Sea green
|
||||
case 'Regionalism':
|
||||
return '#C061CB'; // Vibrant purple
|
||||
case 'Repetition':
|
||||
return '#00A67C'; // Green-cyan
|
||||
case 'Spelling':
|
||||
return '#EE4266'; // Pink-red
|
||||
case 'Style':
|
||||
return '#FFD23F'; // Yellow
|
||||
case 'WordChoice':
|
||||
return '#228B22'; // Forest green
|
||||
default:
|
||||
throw new Error(`Unexpected lint kind: ${lintKindKey}`);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue