mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
Irregular verbs (#2285)
Some checks are pending
Binaries / harper-cli - macOS-aarch64 (push) Waiting to run
Binaries / harper-cli - Linux-aarch64-GNU (push) Waiting to run
Binaries / harper-cli - Linux-aarch64-musl (push) Waiting to run
Binaries / harper-cli - macOS-x86_64 (push) Waiting to run
Binaries / harper-cli - Linux-x86_64-GNU (push) Waiting to run
Binaries / harper-cli - Linux-x86_64-musl (push) Waiting to run
Binaries / harper-cli - Windows-x86_64 (push) Waiting to run
Binaries / harper-ls - macOS-aarch64 (push) Waiting to run
Binaries / harper-ls - Linux-aarch64-GNU (push) Waiting to run
Binaries / harper-ls - Linux-aarch64-musl (push) Waiting to run
Binaries / harper-ls - macOS-x86_64 (push) Waiting to run
Binaries / harper-ls - Linux-x86_64-GNU (push) Waiting to run
Binaries / harper-ls - Linux-x86_64-musl (push) Waiting to run
Binaries / harper-ls - Windows-x86_64 (push) Waiting to run
Build Web / build-web (push) Waiting to run
Chrome Plugin / chrome-plugin (push) Waiting to run
Just Checks / just check-js (push) Waiting to run
Just Checks / just check-rust (push) Waiting to run
Just Checks / just test-chrome-plugin (push) Waiting to run
Just Checks / just test-firefox-plugin (push) Waiting to run
Just Checks / just test-harperjs (push) Waiting to run
Just Checks / just test-obsidian (push) Waiting to run
Just Checks / just test-rust (push) Waiting to run
Just Checks / just test-vscode (push) Waiting to run
VS Code Plugin / alpine-arm64 (push) Waiting to run
VS Code Plugin / alpine-x64 (push) Waiting to run
VS Code Plugin / darwin-arm64 (push) Waiting to run
VS Code Plugin / linux-arm64 (push) Waiting to run
VS Code Plugin / darwin-x64 (push) Waiting to run
VS Code Plugin / linux-armhf (push) Waiting to run
VS Code Plugin / linux-x64 (push) Waiting to run
VS Code Plugin / win32-arm64 (push) Waiting to run
VS Code Plugin / win32-x64 (push) Waiting to run
WordPress Plugin / wp-plugin (push) Waiting to run
Some checks are pending
Binaries / harper-cli - macOS-aarch64 (push) Waiting to run
Binaries / harper-cli - Linux-aarch64-GNU (push) Waiting to run
Binaries / harper-cli - Linux-aarch64-musl (push) Waiting to run
Binaries / harper-cli - macOS-x86_64 (push) Waiting to run
Binaries / harper-cli - Linux-x86_64-GNU (push) Waiting to run
Binaries / harper-cli - Linux-x86_64-musl (push) Waiting to run
Binaries / harper-cli - Windows-x86_64 (push) Waiting to run
Binaries / harper-ls - macOS-aarch64 (push) Waiting to run
Binaries / harper-ls - Linux-aarch64-GNU (push) Waiting to run
Binaries / harper-ls - Linux-aarch64-musl (push) Waiting to run
Binaries / harper-ls - macOS-x86_64 (push) Waiting to run
Binaries / harper-ls - Linux-x86_64-GNU (push) Waiting to run
Binaries / harper-ls - Linux-x86_64-musl (push) Waiting to run
Binaries / harper-ls - Windows-x86_64 (push) Waiting to run
Build Web / build-web (push) Waiting to run
Chrome Plugin / chrome-plugin (push) Waiting to run
Just Checks / just check-js (push) Waiting to run
Just Checks / just check-rust (push) Waiting to run
Just Checks / just test-chrome-plugin (push) Waiting to run
Just Checks / just test-firefox-plugin (push) Waiting to run
Just Checks / just test-harperjs (push) Waiting to run
Just Checks / just test-obsidian (push) Waiting to run
Just Checks / just test-rust (push) Waiting to run
Just Checks / just test-vscode (push) Waiting to run
VS Code Plugin / alpine-arm64 (push) Waiting to run
VS Code Plugin / alpine-x64 (push) Waiting to run
VS Code Plugin / darwin-arm64 (push) Waiting to run
VS Code Plugin / linux-arm64 (push) Waiting to run
VS Code Plugin / darwin-x64 (push) Waiting to run
VS Code Plugin / linux-armhf (push) Waiting to run
VS Code Plugin / linux-x64 (push) Waiting to run
VS Code Plugin / win32-arm64 (push) Waiting to run
VS Code Plugin / win32-x64 (push) Waiting to run
WordPress Plugin / wp-plugin (push) Waiting to run
* chore: start working on irregular plural module * feat: irregular verb module * fix: `will_non_lemma.rs` shouldn't be included here * fix: `just format` * refactor: in response to PR feedback * fix: implement @elijah's requested changes
This commit is contained in:
parent
4b8c619bb7
commit
66f3e84357
6 changed files with 558 additions and 103 deletions
162
harper-core/irregular_nouns.json
Normal file
162
harper-core/irregular_nouns.json
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
[
|
||||
"// comments can appear in the line before an entry",
|
||||
"// or in place of an entry",
|
||||
["child", "children"],
|
||||
["foot", "feet"],
|
||||
["goose", "geese"],
|
||||
["man", "men"],
|
||||
["mouse", "mice"],
|
||||
["ox", "oxen"],
|
||||
["person", "people"],
|
||||
["seraph", "seraphim"],
|
||||
["woman", "women"],
|
||||
["addendum", "addenda"],
|
||||
["aircraft", "aircraft"],
|
||||
["aircraftman", "aircraftmen"],
|
||||
["aircraftwoman", "aircraftwomen"],
|
||||
["airman", "airmen"],
|
||||
["alderman", "aldermen"],
|
||||
["alga", "algae"],
|
||||
["alveolus", "alveoli"],
|
||||
["anchorman", "anchormen"],
|
||||
["anchorwoman", "anchorwomen"],
|
||||
["atrium", "atria"],
|
||||
["axis", "axes"],
|
||||
["bacillus", "bacilli"],
|
||||
["bacterium", "bacteria"],
|
||||
["bandsman", "bandsmen"],
|
||||
["bargeman", "bargemen"],
|
||||
["bellman", "bellmen"],
|
||||
["biceps", "biceps"],
|
||||
["boatman", "boatmen"],
|
||||
["bronchus", "bronchi"],
|
||||
["businesswoman", "businesswomen"],
|
||||
["cactus", "cacti"],
|
||||
["cameraperson", "camerapeople"],
|
||||
["candelabrum", "candelabra"],
|
||||
["catharsis", "catharses"],
|
||||
["chairman", "chairmen"],
|
||||
["chairwoman", "chairwomen"],
|
||||
["churchwoman", "churchwomen"],
|
||||
["clansman", "clansmen"],
|
||||
["clanswoman", "clanswomen"],
|
||||
["committeeman", "committeemen"],
|
||||
["committeewoman", "committeewomen"],
|
||||
["continuum", "continua"],
|
||||
["corpus", "corpora"],
|
||||
["craftsman", "craftsmen"],
|
||||
["craftswoman", "craftswomen"],
|
||||
["crisis", "crises"],
|
||||
["cyclops", "cyclopes"],
|
||||
["datum", "data"],
|
||||
["diaeresis", "diaereses"],
|
||||
["diagnosis", "diagnoses"],
|
||||
["dominatrix", "dominatrices"],
|
||||
["draughtsman", "draughtsmen"],
|
||||
["draughtswoman", "draughtswomen"],
|
||||
["effluvium", "effluvia"],
|
||||
["emphasis", "emphases"],
|
||||
["esophagus", "esophagi"],
|
||||
["extremum", "extrema"],
|
||||
["fish", "fish"],
|
||||
["footman", "footmen"],
|
||||
["formula", "formulae"],
|
||||
["forum", "fora"],
|
||||
["freeman", "freemen"],
|
||||
["frontiersman", "frontiersmen"],
|
||||
["frontierswoman", "frontierswomen"],
|
||||
["garbageman", "garbagemen"],
|
||||
["genesis", "geneses"],
|
||||
["genie", "genii"],
|
||||
["genius", "genii"],
|
||||
["genus", "genera"],
|
||||
["glissando", "glissandi"],
|
||||
["graffito", "graffiti"],
|
||||
["grandchild", "grandchildren"],
|
||||
["handyman", "handymen"],
|
||||
["hitman", "hitmen"],
|
||||
["houseman", "housemen"],
|
||||
["iceman", "icemen"],
|
||||
["ilium", "ilia"],
|
||||
["index", "indices"],
|
||||
["intermezzo", "intermezzi"],
|
||||
["journeyman", "journeymen"],
|
||||
["labium", "labia"],
|
||||
["lamina", "laminae"],
|
||||
["laundrywoman", "laundrywomen"],
|
||||
["laywoman", "laywomen"],
|
||||
["linesman", "linesmen"],
|
||||
["lira", " lire"],
|
||||
["longshoreman", "longshoremen"],
|
||||
["louse", "lice"],
|
||||
["madman", "madmen"],
|
||||
["mailman", "mailmen"],
|
||||
["memorandum", "memoranda"],
|
||||
["metathesis", "metatheses"],
|
||||
["minimum", "minima"],
|
||||
["mitosis", "mitoses"],
|
||||
["motorman", "motormen"],
|
||||
["muscleman", "musclemen"],
|
||||
["nemesis", "nemeses"],
|
||||
["nightwatchman", "nightwatchmen"],
|
||||
["oarsman", "oarsmen"],
|
||||
["oarswoman", "oarswomen"],
|
||||
["oasis", "oases"],
|
||||
["ombudsman", "ombudsmen"],
|
||||
["optimum", "optima"],
|
||||
["palazzo", "palazzi"],
|
||||
["papyrus", "papyri"],
|
||||
["parenthesis", "parentheses"],
|
||||
["patina", "patinae"],
|
||||
["patrolman", "patrolmen"],
|
||||
["pericardium", "pericardia"],
|
||||
["periphrasis", "periphrases"],
|
||||
["pharynx", "pharynges"],
|
||||
["phenomenon", "phenomena"],
|
||||
["plainclothesman", "plainclothesmen"],
|
||||
["pneumococcus", "pneumococci"],
|
||||
["pressman", "pressmen"],
|
||||
["prosthesis", "protheses"],
|
||||
["quantum", "quanta"],
|
||||
["radius", "radii"],
|
||||
["radix", "radices"],
|
||||
["repairman", "repairmen"],
|
||||
["salesman", "salesmen"],
|
||||
["saleswoman", "saleswomen"],
|
||||
["sandman", "sandmen"],
|
||||
["schema", "schemata"],
|
||||
["sheep", "sheep"],
|
||||
["shoreman", "shoremen"],
|
||||
["signore", "signori"],
|
||||
["simulacrum", "simulacra"],
|
||||
["solarium", "solaria"],
|
||||
["spokesman", "spokesmen"],
|
||||
["spokesperson", "spokespeople"],
|
||||
["spokeswoman", "spokeswomen"],
|
||||
["statesman", "statesmen"],
|
||||
["stateswoman", "stateswomen"],
|
||||
["steersman", "steersmen"],
|
||||
["stratum", "strata"],
|
||||
["streptococcus", "streptococci"],
|
||||
["succubus", "succubi"],
|
||||
["symbiosis", "symbioses"],
|
||||
["tarsus", "tarsi"],
|
||||
["taxon", "taxa"],
|
||||
["testatrix", "testatrices"],
|
||||
["testis", "testes"],
|
||||
["thesis", "theses"],
|
||||
["thrombosis", "thromboses"],
|
||||
["tooth", "teeth"],
|
||||
["townsman", "townsmen"],
|
||||
["townswoman", "townswomen"],
|
||||
["tradesman", "tradesmen"],
|
||||
["tradeswoman", "tradeswomen"],
|
||||
["uterus", "uteri"],
|
||||
["vertebra", "vertebrae"],
|
||||
["vertex", "vertices"],
|
||||
["vivarium", "vivaria"],
|
||||
["washerwoman", "washerwomen"],
|
||||
["woodlouse", "woodlice"],
|
||||
["workingwoman", "workingwomen"],
|
||||
["workman", "workmen"]
|
||||
]
|
||||
127
harper-core/irregular_verbs.json
Normal file
127
harper-core/irregular_verbs.json
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
[
|
||||
"// comments can appear in the line before an entry",
|
||||
"// or in place of an entry",
|
||||
["arise", "arose", "arisen"],
|
||||
["awake", "awoke", "awoken"],
|
||||
"// be/am/are/is -- was/were -- been",
|
||||
["become", "became", "become"],
|
||||
["begin", "began", "begun"],
|
||||
["bend", "bent", "bent"],
|
||||
["bet", "bet", "bet"],
|
||||
["bid", "bade", "bidden"],
|
||||
["bind", "bound", "bound"],
|
||||
["bite", "bit", "bitten"],
|
||||
["bleed", "bled", "bled"],
|
||||
["blow", "blew", "blown"],
|
||||
["break", "broke", "broken"],
|
||||
["breed", "bred", "bred"],
|
||||
["bring", "brought", "brought"],
|
||||
["build", "built", "built"],
|
||||
["burst", "burst", "burst"],
|
||||
["buy", "bought", "bought"],
|
||||
["catch", "caught", "caught"],
|
||||
["choose", "chose", "chosen"],
|
||||
["come", "came", "come"],
|
||||
["cost", "cost", "cost"],
|
||||
["cut", "cut", "cut"],
|
||||
["dive", "dove", "dove"],
|
||||
["do", "did", "done"],
|
||||
["drink", "drank", "drunk"],
|
||||
["drive", "drove", "driven"],
|
||||
["eat", "ate", "eaten"],
|
||||
["fall", "fell", "fallen"],
|
||||
["feed", "fed", "fed"],
|
||||
["feel", "felt", "felt"],
|
||||
["fight", "fought", "fought"],
|
||||
["find", "found", "found"],
|
||||
["fly", "flew", "flown"],
|
||||
["forget", "forgot", "forgotten"],
|
||||
["forgo", "forwent", "forgone"],
|
||||
["freeze", "froze", "frozen"],
|
||||
"// get -- got -- gotten",
|
||||
["get", "got", "got"],
|
||||
["give", "gave", "given"],
|
||||
["go", "went", "gone"],
|
||||
["grow", "grew", "grown"],
|
||||
["have", "had", "had"],
|
||||
["hear", "heard", "heard"],
|
||||
["hit", "hit", "hit"],
|
||||
["hold", "held", "held"],
|
||||
["hurt", "hurt", "hurt"],
|
||||
["input", "input", "input"],
|
||||
["keep", "kept", "kept"],
|
||||
["know", "knew", "known"],
|
||||
["lay", "laid", "lain"],
|
||||
["lead", "led", "led"],
|
||||
["light", "lit", "lit"],
|
||||
["lose", "lost", "lost"],
|
||||
["make", "made", "made"],
|
||||
["mistake", "mistook", "mistaken"],
|
||||
["output", "output", "output"],
|
||||
["overtake", "overtook", "overtaken"],
|
||||
["overthrow", "overthrew", "overthrown"],
|
||||
["overwrite", "overwrote", "overwritten"],
|
||||
["partake", "partook", "partaken"],
|
||||
["pay", "paid", "paid"],
|
||||
["put", "put", "put"],
|
||||
["read", "read", "read"],
|
||||
["redo", "redid", "redone"],
|
||||
["remake", "remade", "remade"],
|
||||
["reread", "reread", "reread"],
|
||||
["reset", "reset", "reset"],
|
||||
["ride", "rode", "ridden"],
|
||||
["ring", "rang", "rung"],
|
||||
["rise", "rose", "risen"],
|
||||
["run", "ran", "run"],
|
||||
["see", "saw", "seen"],
|
||||
["sell", "sold", "sold"],
|
||||
["send", "sent", "sent"],
|
||||
["set", "set", "set"],
|
||||
["shake", "shook", "shaken"],
|
||||
["shed", "shed", "shed"],
|
||||
["shine", "shone", "shone"],
|
||||
["shoe", "shod", "shod"],
|
||||
["shoot", "shot", "shot"],
|
||||
["show", "showed", "shown"],
|
||||
["shrink", "shrank", "shrunk"],
|
||||
["shut", "shut", "shut"],
|
||||
["sing", "sang", "sung"],
|
||||
"// sink -- sank -- sunken??",
|
||||
["sink", "sank", "sunk"],
|
||||
["sit", "sat", "sat"],
|
||||
["slay", "slew", "slain"],
|
||||
["sleep", "slept", "slept"],
|
||||
["slide", "slid", "slid"],
|
||||
["slit", "slit", "slit"],
|
||||
"// sneak -- sneaked/snuck -- sneaked/snuck",
|
||||
["speak", "spoke", "spoken"],
|
||||
["spin", "spun", "spun"],
|
||||
["spit", "spat", "spat"],
|
||||
["split", "split", "split"],
|
||||
["spread", "spread", "spread"],
|
||||
["spring", "sprang", "sprung"],
|
||||
["stand", "stood", "stood"],
|
||||
["steal", "stole", "stolen"],
|
||||
["stick", "stuck", "stuck"],
|
||||
["sting", "stung", "stung"],
|
||||
["stink", "stank", "stunk"],
|
||||
["stride", "strode", "stridden"],
|
||||
["strike", "struck", "stricken"],
|
||||
["string", "strung", "strung"],
|
||||
["sew", "sewed", "sewn"],
|
||||
["swear", "swore", "sworn"],
|
||||
["swim", "swam", "swum"],
|
||||
["swing", "swung", "swung"],
|
||||
["take", "took", "taken"],
|
||||
["teach", "taught", "taught"],
|
||||
["tear", "tore", "torn"],
|
||||
["think", "thought", "thought"],
|
||||
["throw", "threw", "thrown"],
|
||||
["tread", "trod", "trodden"],
|
||||
["undo", "undid", "undone"],
|
||||
["wake", "woke", "woken"],
|
||||
["wear", "wore", "worn"],
|
||||
["weave", "wove", "woven"],
|
||||
["wind", "wound", "wound"],
|
||||
["write", "wrote", "written"]
|
||||
]
|
||||
121
harper-core/src/irregular_nouns.rs
Normal file
121
harper-core/src/irregular_nouns.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
type Noun = (String, String);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IrregularNouns {
|
||||
nouns: Vec<Noun>,
|
||||
}
|
||||
|
||||
/// The uncached function that is used to produce the original copy of the
|
||||
/// irregular noun table.
|
||||
fn uncached_inner_new() -> Arc<IrregularNouns> {
|
||||
IrregularNouns::from_json_file(include_str!("../irregular_nouns.json"))
|
||||
.map(Arc::new)
|
||||
.unwrap_or_else(|e| panic!("Failed to load irregular noun table: {}", e))
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref NOUNS: Arc<IrregularNouns> = uncached_inner_new();
|
||||
}
|
||||
|
||||
impl IrregularNouns {
|
||||
pub fn new() -> Self {
|
||||
Self { nouns: vec![] }
|
||||
}
|
||||
|
||||
pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
|
||||
// Deserialize into Vec<serde_json::Value> to handle mixed types
|
||||
let values: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json).expect("Failed to parse irregular nouns JSON");
|
||||
|
||||
let mut nouns = Vec::new();
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
serde_json::Value::Array(arr) if arr.len() == 2 => {
|
||||
// Handle array of 2 strings
|
||||
if let (Some(singular), Some(plural)) = (arr[0].as_str(), arr[1].as_str()) {
|
||||
nouns.push((singular.to_string(), plural.to_string()));
|
||||
}
|
||||
}
|
||||
// Strings are used for comments to guide contributors editing the file
|
||||
serde_json::Value::String(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { nouns })
|
||||
}
|
||||
|
||||
pub fn curated() -> Arc<Self> {
|
||||
(*NOUNS).clone()
|
||||
}
|
||||
|
||||
pub fn get_plural_for_singular(&self, singular: &str) -> Option<&str> {
|
||||
self.nouns
|
||||
.iter()
|
||||
.find(|(sg, _)| sg.eq_ignore_ascii_case(singular))
|
||||
.map(|(_, pl)| pl.as_str())
|
||||
}
|
||||
|
||||
pub fn get_singular_for_plural(&self, plural: &str) -> Option<&str> {
|
||||
self.nouns
|
||||
.iter()
|
||||
.find(|(_, pl)| pl.eq_ignore_ascii_case(plural))
|
||||
.map(|(sg, _)| sg.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IrregularNouns {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_plural_for_singular_lowercase() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("man"),
|
||||
Some("men")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_plural_for_singular_uppercase() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("WOMAN"),
|
||||
Some("women")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_singular_for_irregular_plural() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_singular_for_plural("children"),
|
||||
Some("child")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_regular_plural() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("car"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_non_noun() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("the"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
120
harper-core/src/irregular_verbs.rs
Normal file
120
harper-core/src/irregular_verbs.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
type Verb = (String, String, String);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IrregularVerbs {
|
||||
verbs: Vec<Verb>,
|
||||
}
|
||||
|
||||
/// The uncached function that is used to produce the original copy of the
|
||||
/// irregular verb table.
|
||||
fn uncached_inner_new() -> Arc<IrregularVerbs> {
|
||||
IrregularVerbs::from_json_file(include_str!("../irregular_verbs.json"))
|
||||
.map(Arc::new)
|
||||
.unwrap_or_else(|e| panic!("Failed to load irregular verb table: {}", e))
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref VERBS: Arc<IrregularVerbs> = uncached_inner_new();
|
||||
}
|
||||
|
||||
impl IrregularVerbs {
|
||||
pub fn new() -> Self {
|
||||
Self { verbs: vec![] }
|
||||
}
|
||||
|
||||
pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
|
||||
// Deserialize into Vec<serde_json::Value> to handle mixed types
|
||||
let values: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json).expect("Failed to parse irregular verbs JSON");
|
||||
|
||||
let mut verbs = Vec::new();
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
serde_json::Value::Array(arr) if arr.len() == 3 => {
|
||||
// Handle array of 3 strings
|
||||
if let (Some(lemma), Some(preterite), Some(past_participle)) =
|
||||
(arr[0].as_str(), arr[1].as_str(), arr[2].as_str())
|
||||
{
|
||||
verbs.push((
|
||||
lemma.to_string(),
|
||||
preterite.to_string(),
|
||||
past_participle.to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// Strings are used for comments to guide contributors editing the file
|
||||
serde_json::Value::String(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { verbs })
|
||||
}
|
||||
|
||||
pub fn curated() -> Arc<Self> {
|
||||
(*VERBS).clone()
|
||||
}
|
||||
|
||||
pub fn get_past_participle_for_preterite(&self, preterite: &str) -> Option<&str> {
|
||||
self.verbs
|
||||
.iter()
|
||||
.find(|(_, pt, _)| pt.eq_ignore_ascii_case(preterite))
|
||||
.map(|(_, _, pp)| pp.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IrregularVerbs {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_for_preterite_lowercase() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("arose"),
|
||||
Some("arisen")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_for_preterite_uppercase() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("WENT"),
|
||||
Some("gone")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_same_as_past_tense() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("taught"),
|
||||
Some("taught")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_regular_past_participle() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("walked"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_non_verb() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("the"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -11,6 +11,8 @@ mod edit_distance;
|
|||
pub mod expr;
|
||||
mod fat_token;
|
||||
mod ignored_lints;
|
||||
mod irregular_nouns;
|
||||
mod irregular_verbs;
|
||||
pub mod language_detection;
|
||||
mod lexing;
|
||||
pub mod linting;
|
||||
|
|
@ -42,6 +44,8 @@ pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
|
|||
pub use document::Document;
|
||||
pub use fat_token::{FatStringToken, FatToken};
|
||||
pub use ignored_lints::{IgnoredLints, LintContext};
|
||||
pub use irregular_nouns::IrregularNouns;
|
||||
pub use irregular_verbs::IrregularVerbs;
|
||||
use linting::Lint;
|
||||
pub use mask::{Mask, Masker};
|
||||
pub use number::{Number, OrdinalSuffix};
|
||||
|
|
|
|||
|
|
@ -1,82 +1,12 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token,
|
||||
char_string::CharStringExt,
|
||||
expr::{All, Expr, FirstMatchOf, SequenceExpr},
|
||||
irregular_verbs::IrregularVerbs,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::{InflectionOfBe, WordSet},
|
||||
};
|
||||
|
||||
/// Maps common irregular verbs between their simple past and past participle forms.
|
||||
const IRREGULAR_VERBS: &[(&str, &str)] = &[
|
||||
("arose", "arisen"),
|
||||
("ate", "eaten"),
|
||||
("awoke", "awoken"),
|
||||
("bade", "bidden"),
|
||||
("became", "become"),
|
||||
("began", "begun"),
|
||||
("bit", "bitten"),
|
||||
("blew", "blown"),
|
||||
("bought", "bought"),
|
||||
("brang", "brung"),
|
||||
("broke", "broken"),
|
||||
("brought", "brought"),
|
||||
("came", "come"),
|
||||
("chose", "chosen"),
|
||||
("did", "done"),
|
||||
("drank", "drunk"),
|
||||
("drove", "driven"),
|
||||
("fell", "fallen"),
|
||||
("felt", "felt"),
|
||||
("flew", "flown"),
|
||||
("forgot", "forgotten"),
|
||||
("forwent", "forgone"),
|
||||
("gave", "given"),
|
||||
("grew", "grown"),
|
||||
("had", "had"),
|
||||
("heard", "heard"),
|
||||
("hit", "hit"),
|
||||
("input", "input"),
|
||||
("knew", "known"),
|
||||
("led", "led"),
|
||||
("mistook", "mistaken"),
|
||||
("output", "output"),
|
||||
("overtook", "overtaken"),
|
||||
("paid", "paid"),
|
||||
("partook", "partaken"),
|
||||
// proved, proved/proven
|
||||
("put", "put"),
|
||||
("ran", "run"),
|
||||
("rang", "rung"),
|
||||
("read", "read"),
|
||||
("reset", "reset"),
|
||||
("rode", "ridden"),
|
||||
("rose", "risen"),
|
||||
("sang", "sung"),
|
||||
("sank", "sunken"),
|
||||
("saw", "seen"),
|
||||
("set", "set"),
|
||||
("sewed", "sewn"),
|
||||
("slew", "slain"),
|
||||
("slid", "slid"),
|
||||
("spoke", "spoken"),
|
||||
("sprang", "sprung"),
|
||||
("stank", "stunk"),
|
||||
("stole", "stolen"),
|
||||
("stood", "stood"),
|
||||
("swam", "swum"),
|
||||
("swore", "sworn"),
|
||||
("thought", "thought"),
|
||||
("trod", "trodden"),
|
||||
("took", "taken"),
|
||||
// was, been
|
||||
// were, been
|
||||
("went", "gone"),
|
||||
("woke", "woken"),
|
||||
("wove", "woven"),
|
||||
("wrote", "written"),
|
||||
];
|
||||
|
||||
/// Corrects simple past tense verbs to past participle after auxiliary verbs like "have" or "be".
|
||||
pub struct SimplePastToPastParticiple {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -141,41 +71,32 @@ impl ExprLinter for SimplePastToPastParticiple {
|
|||
|
||||
let verb_tok = &toks[2];
|
||||
|
||||
let verb_ch = verb_tok.span.get_content(src);
|
||||
if !IRREGULAR_VERBS
|
||||
.iter()
|
||||
.any(|(t, p)| verb_ch.eq_ignore_ascii_case_str(t) && p != t)
|
||||
let simple_past = verb_tok.span.get_content_string(src);
|
||||
|
||||
if let Some(past_participle) = IrregularVerbs::curated()
|
||||
.get_past_participle_for_preterite(&simple_past)
|
||||
.filter(|pp| pp != &simple_past)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let suggestions = vec![Suggestion::replace_with_match_case(
|
||||
past_participle.chars().collect(),
|
||||
verb_tok.span.get_content(src),
|
||||
)];
|
||||
|
||||
let (simple_past, past_participle) = IRREGULAR_VERBS
|
||||
.iter()
|
||||
.find(|(simple_past, _)| {
|
||||
verb_tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str(simple_past)
|
||||
let message = format!(
|
||||
"Use the past participle `{}` instead of `{}` when using compound tenses or passive voice.",
|
||||
past_participle, simple_past
|
||||
);
|
||||
|
||||
Some(Lint {
|
||||
span: verb_tok.span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
suggestions,
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let suggestions = vec![Suggestion::replace_with_match_case(
|
||||
past_participle.chars().collect(),
|
||||
verb_tok.span.get_content(src),
|
||||
)];
|
||||
|
||||
let message = format!(
|
||||
"Use the past participle `{}` instead of `{}` when using compound tenses or passive voice.",
|
||||
past_participle, simple_past
|
||||
);
|
||||
|
||||
Some(Lint {
|
||||
span: verb_tok.span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
suggestions,
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue