feat: try to detect space in wrong spot between words

This commit is contained in:
hippietrail 2025-12-20 02:15:26 +08:00
parent 0dc6738905
commit a7f8eb42fa
6 changed files with 242 additions and 6 deletions

View file

@ -1,4 +1,4 @@
54100
54200
# Start of original dictionary import
# combined with dialect spelling dictionary import.
@ -40931,7 +40931,6 @@ quote's
quotidian/JN
quotient/~NSg
qwerty/J # dictionaries prefer QWERTY
r/~VS # !! 'r' is a verb??
rabbet/NgSVGd
rabbi/~NSg
rabbinate/Ng

View file

@ -185,6 +185,7 @@ use super::throw_rubbish::ThrowRubbish;
use super::to_adverb::ToAdverb;
use super::to_two_too::ToTwoToo;
use super::touristic::Touristic;
use super::transposed_space::TransposedSpace;
use super::unclosed_quotes::UnclosedQuotes;
use super::update_place_names::UpdatePlaceNames;
use super::use_genitive::UseGenitive;
@ -706,6 +707,9 @@ impl LintGroup {
);
out.config.set_rule_enabled("DisjointPrefixes", true);
out.add_chunk_expr_linter("TransposedSpace", TransposedSpace::new(dictionary.clone()));
out.config.set_rule_enabled("TransposedSpace", true);
out
}

View file

@ -198,6 +198,7 @@ mod throw_rubbish;
mod to_adverb;
mod to_two_too;
mod touristic;
mod transposed_space;
mod unclosed_quotes;
mod update_place_names;
mod use_genitive;

View file

@ -0,0 +1,181 @@
use crate::{
Lint, Token, TokenStringExt,
expr::{Expr, FirstMatchOf, SequenceExpr},
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
spell::Dictionary,
};
pub struct TransposedSpace<D: Dictionary + 'static> {
expr: Box<dyn Expr>,
dict: D,
}
impl<D: Dictionary + 'static> TransposedSpace<D> {
pub fn new(dict: D) -> Self {
Self {
expr: Box::new(FirstMatchOf::new(vec![Box::new(
SequenceExpr::default().then_oov().t_ws().then_oov(),
)])),
dict,
}
}
pub fn sensitive(dict: D) -> Self {
Self {
expr: Box::new(FirstMatchOf::new(vec![
Box::new(SequenceExpr::default().then_oov().t_ws().then_any_word()),
Box::new(SequenceExpr::default().then_any_word().t_ws().then_oov()),
Box::new(SequenceExpr::default().then_oov().t_ws().then_oov()),
])),
dict,
}
}
}
fn keep_unique(values: &mut Vec<String>, word1: &[char], word2: &[char]) {
let value = format!(
"{} {}",
word1.iter().collect::<String>(),
word2.iter().collect::<String>()
);
if !values.contains(&value) {
values.push(value);
}
}
impl<D: Dictionary + 'static> ExprLinter for TransposedSpace<D> {
type Unit = Chunk;
fn expr(&self) -> &dyn Expr {
self.expr.as_ref()
}
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
let toks_span = toks.span()?;
// "thec" "at" / "th ecat"
let word1 = toks.first()?.span.get_content(src);
let word2 = toks.last()?.span.get_content(src);
// "thec" -> "the c"
let w1_start = &word1[..word1.len() - 1];
let w1_last = word1.iter().last()?;
// "ecat" -> "e cat"
let w2_first = word2.first()?;
let w2_end = &word2[1..];
// "c" + "at" -> "cat"
let mut w1_last_plus_w2 = word2.to_vec();
w1_last_plus_w2.insert(0, *w1_last);
// "th" + "e" -> "the"
let mut w1_plus_w2_first = word1.to_vec();
w1_plus_w2_first.push(*w2_first);
let mut values = vec![];
// "thec" "at" -> "the cat"
if self.dict.contains_word(w1_start) && self.dict.contains_word(&w1_last_plus_w2) {
let maybe_canon_w2 = self.dict.get_correct_capitalization_of(&w1_last_plus_w2);
if let Some(canon_w1) = self.dict.get_correct_capitalization_of(w1_start) {
if let Some(canon_w2) = maybe_canon_w2 {
keep_unique(&mut values, canon_w1, canon_w2);
} else {
keep_unique(&mut values, canon_w1, &w1_last_plus_w2);
}
} else if let Some(canon_w2) = maybe_canon_w2 {
keep_unique(&mut values, w1_start, canon_w2);
}
keep_unique(&mut values, w1_start, &w1_last_plus_w2);
}
// "th" "ecat" -> "the cat"
if self.dict.contains_word(&w1_plus_w2_first) && self.dict.contains_word(w2_end) {
let maybe_canon_w2 = self.dict.get_correct_capitalization_of(w2_end);
if let Some(canon_w1) = self.dict.get_correct_capitalization_of(&w1_plus_w2_first) {
if let Some(canon_w2) = maybe_canon_w2 {
keep_unique(&mut values, canon_w1, canon_w2);
} else {
keep_unique(&mut values, canon_w1, w2_end);
}
} else if let Some(canon_w2) = maybe_canon_w2 {
keep_unique(&mut values, &w1_plus_w2_first, canon_w2);
}
keep_unique(&mut values, &w1_plus_w2_first, w2_end);
}
if values.is_empty() {
return None;
}
let suggestions = values
.iter()
.map(|value| {
Suggestion::replace_with_match_case(
value.chars().collect(),
toks_span.get_content(src),
)
})
.collect();
Some(Lint {
span: toks_span,
lint_kind: LintKind::Typo,
suggestions,
message: format!(
"Is the space between `{}` and `{}` one character out of place?",
word1.iter().collect::<String>(),
word2.iter().collect::<String>()
),
..Default::default()
})
}
fn description(&self) -> &str {
"Looks for a space one character too early or too late between words."
}
}
#[cfg(test)]
mod tests {
use super::TransposedSpace;
use crate::{linting::tests::assert_suggestion_result, spell::FstDictionary};
#[test]
fn space_too_early() {
assert_suggestion_result(
"Th ecat sat on the mat.",
TransposedSpace::sensitive(FstDictionary::curated()),
"The cat sat on the mat.",
);
}
#[test]
fn space_too_late() {
assert_suggestion_result(
"Thec at sat on the mat.",
TransposedSpace::sensitive(FstDictionary::curated()),
"The cat sat on the mat.",
);
}
#[test]
fn test_early() {
assert_suggestion_result(
"Sometimes the spac eis one character early.",
TransposedSpace::new(FstDictionary::curated()),
"Sometimes the space is one character early.",
);
}
#[test]
fn test_late() {
assert_suggestion_result(
"Ands ometimes the space is a character late.",
TransposedSpace::new(FstDictionary::curated()),
"And sometimes the space is a character late.",
);
}
}

View file

@ -6197,6 +6197,37 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
4486 | “No, r—” corrected the man, “M-a-v-r-o———”
| ^ This word's canonical spelling is all-caps.
Suggest:
- Replace with: “R”
Lint: Spelling (63 priority)
Message: |
4486 | “No, r—” corrected the man, “M-a-v-r-o———”
| ^ Did you mean to spell `r` this way?
Suggest:
- Replace with: “re”
- Replace with: “a”
- Replace with: “e”
Lint: Spelling (63 priority)
Message: |
4486 | “No, r—” corrected the man, “M-a-v-r-o———”
| ^ Did you mean to spell `r` this way?
Suggest:
- Replace with: “re”
- Replace with: “a”
- Replace with: “e”
Lint: Spelling (63 priority)
Message: |
4486 | “No, r—” corrected the man, “M-a-v-r-o———”
@ -6208,6 +6239,26 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
4490 | “r—” said the policeman, “o———”
| ^ This word's canonical spelling is all-caps.
Suggest:
- Replace with: “R”
Lint: Spelling (63 priority)
Message: |
4490 | “r—” said the policeman, “o———”
| ^ Did you mean to spell `r` this way?
Suggest:
- Replace with: “re”
- Replace with: “a”
- Replace with: “e”
Lint: Capitalization (127 priority)
Message: |
4490 | “r—” said the policeman, “o———”

View file

@ -8968,16 +8968,16 @@
# . NPr/VB/J/#r . D/P . NSg/P/#r . . D+ NSg+ VPt N🅪Sg/Vg/J . . . NPr/J/P . . . .
>
#
> “ No , r — ” corrected the man , “ M - a - v - r - o — — — ”
# . NPr/Dq/P . NPr/VB/J . . VP/J D+ NPr/VB/J+ . . NPr/VB/J/#r . D/P . NSg/P/#r . NPr/VB/J . NPr/J/P . . . .
> “ No , r — ” corrected the man , “ M - a - v - r - o — — — ”
# . NPr/Dq/P . NPr/J . . VP/J D+ NPr/VB/J+ . . NPr/VB/J/#r . D/P . NSg/P/#r . NPr/J . NPr/J/P . . . .
>
#
> “ Listen to me ! ” muttered Tom fiercely .
# . NSg/VB P NPr/ISg+ . . VP/J NPr/VB+ R .
>
#
> “ r — ” said the policeman , “ o — — — ”
# . NPr/VB/J . . VP/J D+ NSg+ . . NPr/J/P . . . .
> “ r — ” said the policeman , “ o — — — ”
# . NPr/J . . VP/J D+ NSg+ . . NPr/J/P . . . .
>
#
> “ g — — — ”