mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
Compare commits
84 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
073f60638b | ||
|
|
ce223e73ad | ||
|
|
31c6ecfad5 | ||
|
|
e784f9b212 | ||
|
|
d47b045dca | ||
|
|
78e519daa4 | ||
|
|
07e911e043 | ||
|
|
d2fb82c670 | ||
|
|
441a5b153b | ||
|
|
ec8c8d3552 | ||
|
|
850eb4cb60 | ||
|
|
f3dfb234c4 | ||
|
|
913debf140 | ||
|
|
fbda09642e | ||
|
|
38c7a45533 | ||
|
|
1186655360 | ||
|
|
c160137d43 | ||
|
|
33b60f93e8 | ||
|
|
704a647f26 | ||
|
|
a168e3691d | ||
|
|
18129baa35 | ||
|
|
0dc6738905 | ||
|
|
688b2a5191 | ||
|
|
5e5a0a473a | ||
|
|
6a97ff125c | ||
|
|
639a4b153d | ||
|
|
433829f3b8 | ||
|
|
c9f4b7c49c | ||
|
|
5fc3171a1c | ||
|
|
531bd25fc5 | ||
|
|
bcd0a1c463 | ||
|
|
5c09e09c10 | ||
|
|
f15778ed28 | ||
|
|
6ac8406e29 | ||
|
|
309719994c | ||
|
|
86edc6e34f | ||
|
|
3fa856a68c | ||
|
|
71739177a4 | ||
|
|
dedfa31b98 | ||
|
|
37ae9b4c69 | ||
|
|
2330456311 | ||
|
|
69db96b370 | ||
|
|
9eefe7336e | ||
|
|
e6579442f3 | ||
|
|
b642c99212 | ||
|
|
6b8ffd29f5 | ||
|
|
66f3e84357 | ||
|
|
4b8c619bb7 | ||
|
|
782ab95200 | ||
|
|
9c2892e265 | ||
|
|
02bd5bc5d7 | ||
|
|
bfaa324c07 | ||
|
|
1301d20400 | ||
|
|
e9eb82944e | ||
|
|
cc15110b69 | ||
|
|
d0d0ac72df | ||
|
|
a437d1abd6 | ||
|
|
3def332854 | ||
|
|
2910319fb5 | ||
|
|
483c306110 | ||
|
|
c246654906 | ||
|
|
94a83d7e78 | ||
|
|
72fd089764 | ||
|
|
e40967cf3e | ||
|
|
fc87bd518b | ||
|
|
bd319e7130 | ||
|
|
8cfb684724 | ||
|
|
1ac108fb2c | ||
|
|
150656ee42 | ||
|
|
aa2cdd4029 | ||
|
|
70d5861b94 | ||
|
|
30ff6e1f22 | ||
|
|
8dd02aad4e | ||
|
|
d31396e7a3 | ||
|
|
5d7bbeefc9 | ||
|
|
edff925df0 | ||
|
|
611475176d | ||
|
|
212508e1d9 | ||
|
|
63914aa23e | ||
|
|
4c539bc75f | ||
|
|
5ac4c0ad2e | ||
|
|
538b21fb0c | ||
|
|
8fff5e9a56 | ||
|
|
5d3beaad4f |
177 changed files with 11719 additions and 5855 deletions
2
.gitattributes
vendored
2
.gitattributes
vendored
|
|
@ -1 +1,3 @@
|
|||
* text=auto eol=lf
|
||||
quill_simple.html linguist-generated
|
||||
github_textarea.html linguist-generated
|
||||
|
|
|
|||
1
.github/workflows/just_checks.yml
vendored
1
.github/workflows/just_checks.yml
vendored
|
|
@ -39,7 +39,6 @@ jobs:
|
|||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version-file: ".node-version"
|
||||
package-manager-cache: false
|
||||
- name: Enable Corepack
|
||||
run: corepack enable
|
||||
- name: Rust Cache
|
||||
|
|
|
|||
89
Cargo.lock
generated
89
Cargo.lock
generated
|
|
@ -29,6 +29,15 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloca"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
|
|
@ -1008,10 +1017,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "criterion"
|
||||
version = "0.7.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
|
||||
checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf"
|
||||
dependencies = [
|
||||
"alloca",
|
||||
"anes",
|
||||
"cast",
|
||||
"ciborium",
|
||||
|
|
@ -1020,6 +1030,7 @@ dependencies = [
|
|||
"itertools 0.13.0",
|
||||
"num-traits",
|
||||
"oorandom",
|
||||
"page_size",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -1029,9 +1040,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.6.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
|
||||
checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools 0.13.0",
|
||||
|
|
@ -2658,7 +2669,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-brill"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-pos-utils",
|
||||
"lazy_static",
|
||||
|
|
@ -2692,7 +2703,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-comments"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-html",
|
||||
|
|
@ -2726,7 +2737,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-core"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"ammonia",
|
||||
"bitflags 2.10.0",
|
||||
|
|
@ -2764,7 +2775,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-html"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2775,7 +2786,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-ink"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2786,7 +2797,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-jjdescription"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2797,7 +2808,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-literate-haskell"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-comments",
|
||||
"harper-core",
|
||||
|
|
@ -2808,7 +2819,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-ls"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
|
|
@ -2839,7 +2850,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-pos-utils"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"burn",
|
||||
"burn-ndarray",
|
||||
|
|
@ -2857,7 +2868,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-python"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2868,7 +2879,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-stats"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"harper-core",
|
||||
|
|
@ -2879,7 +2890,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-tree-sitter"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"tree-sitter",
|
||||
|
|
@ -2898,7 +2909,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-typst"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"itertools 0.14.0",
|
||||
|
|
@ -3235,9 +3246,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.12.0"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
|
||||
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.16.1",
|
||||
|
|
@ -3970,6 +3981,16 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.2.1"
|
||||
|
|
@ -5636,9 +5657,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.41"
|
||||
version = "0.1.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
|
||||
checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
|
|
@ -5659,9 +5680,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.30"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
|
||||
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -5670,9 +5691,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.34"
|
||||
version = "0.1.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
|
||||
checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
|
|
@ -5691,9 +5712,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.20"
|
||||
version = "0.3.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
|
||||
checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
|
||||
dependencies = [
|
||||
"nu-ansi-term",
|
||||
"sharded-slab",
|
||||
|
|
@ -5730,9 +5751,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter-bash"
|
||||
version = "0.25.0"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "871b0606e667e98a1237ebdc1b0d7056e0aebfdc3141d12b399865d4cb6ed8a6"
|
||||
checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
|
|
@ -6120,9 +6141,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "unicode-script"
|
||||
version = "0.5.7"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fb421b350c9aff471779e262955939f565ec18b86c15364e6bdf0d662ca7c1f"
|
||||
checksum = "383ad40bb927465ec0ce7720e033cb4ca06912855fc35db31b5755d0de75b1ee"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
|
|
@ -6209,13 +6230,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
|||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.18.1"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
|
||||
checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
|
||||
dependencies = [
|
||||
"getrandom 0.3.4",
|
||||
"js-sys",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "harper-brill"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "harper-comments"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
|
|
@ -13,7 +13,7 @@ harper-html = { path = "../harper-html", version = "1.0.0" }
|
|||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "1.0.0" }
|
||||
itertools = "0.14.0"
|
||||
tree-sitter = "0.25.10"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
tree-sitter-bash = "0.25.1"
|
||||
tree-sitter-c = "0.24.1"
|
||||
tree-sitter-cmake = "0.7.1"
|
||||
tree-sitter-cpp = "0.23.4"
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ impl CommentParser {
|
|||
"dart" => harper_tree_sitter_dart::LANGUAGE,
|
||||
"go" => tree_sitter_go::LANGUAGE,
|
||||
"haskell" => tree_sitter_haskell::LANGUAGE,
|
||||
"daml" => tree_sitter_haskell::LANGUAGE,
|
||||
"java" => tree_sitter_java::LANGUAGE,
|
||||
"javascript" => tree_sitter_javascript::LANGUAGE,
|
||||
"javascriptreact" => tree_sitter_typescript::LANGUAGE_TSX,
|
||||
|
|
@ -89,6 +90,7 @@ impl CommentParser {
|
|||
"dart" => "dart",
|
||||
"go" => "go",
|
||||
"hs" => "haskell",
|
||||
"daml" => "daml",
|
||||
"java" => "java",
|
||||
"js" => "javascript",
|
||||
"jsx" => "javascriptreact",
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ object hello extends ScalaModule {
|
|||
ivy"com.lihaoyi::mainargs:0.6.2" // for CLI argument parsing
|
||||
)
|
||||
|
||||
// Define an test sub-module using a test framework.
|
||||
// Define an test submodule using a test framework.
|
||||
object test extends ScalaTests {
|
||||
def testFramework = "utest.runner.Framework"
|
||||
def ivyDeps = Agg(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "harper-core"
|
||||
version = "1.0.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
|
|
@ -22,7 +22,7 @@ serde_json = "1.0.145"
|
|||
smallvec = { version = "1.15.1", features = ["serde"] }
|
||||
thiserror = "2.0.17"
|
||||
unicode-blocks = "0.1.9"
|
||||
unicode-script = "0.5.7"
|
||||
unicode-script = "0.5.8"
|
||||
unicode-width = "0.2.2"
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
cached = "0.56.0"
|
||||
|
|
@ -36,7 +36,7 @@ bitflags = { version = "2.10.0", features = ["serde"] }
|
|||
trie-rs = "0.4.2"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.7.0", default-features = false }
|
||||
criterion = { version = "0.8.1", default-features = false }
|
||||
rand = "0.8.5"
|
||||
quickcheck = "1.0.3"
|
||||
quickcheck_macros = "1.1.0"
|
||||
|
|
|
|||
|
|
@ -6,6 +6,31 @@
|
|||
Feel free to use `harper-core` in your projects.
|
||||
If you run into issues, create a pull request.
|
||||
|
||||
## Example
|
||||
|
||||
Here's what a full end-to-end linting pipeline could look like using `harper-core`.
|
||||
|
||||
```rust
|
||||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::parsers::PlainEnglish;
|
||||
use harper_core::spell::FstDictionary;
|
||||
use harper_core::{Dialect, Document};
|
||||
|
||||
let text = "This is an test.";
|
||||
let parser = PlainEnglish;
|
||||
|
||||
let document = Document::new_curated(text, &parser);
|
||||
|
||||
let dict = FstDictionary::curated();
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
for lint in lints {
|
||||
println!("{:?}", lint);
|
||||
}
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
`concurrent`: Whether to use thread-safe primitives (`Arc` vs `Rc`). Disabled by default.
|
||||
|
|
|
|||
|
|
@ -997,6 +997,14 @@
|
|||
"metadata": {
|
||||
"//": "not yet implemented"
|
||||
}
|
||||
},
|
||||
"(": {
|
||||
"#": "prefix property",
|
||||
"metadata": {
|
||||
"affix": {
|
||||
"is_prefix": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
162
harper-core/irregular_nouns.json
Normal file
162
harper-core/irregular_nouns.json
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
[
|
||||
"// comments can appear in the line before an entry",
|
||||
"// or in place of an entry",
|
||||
["child", "children"],
|
||||
["foot", "feet"],
|
||||
["goose", "geese"],
|
||||
["man", "men"],
|
||||
["mouse", "mice"],
|
||||
["ox", "oxen"],
|
||||
["person", "people"],
|
||||
["seraph", "seraphim"],
|
||||
["woman", "women"],
|
||||
["addendum", "addenda"],
|
||||
["aircraft", "aircraft"],
|
||||
["aircraftman", "aircraftmen"],
|
||||
["aircraftwoman", "aircraftwomen"],
|
||||
["airman", "airmen"],
|
||||
["alderman", "aldermen"],
|
||||
["alga", "algae"],
|
||||
["alveolus", "alveoli"],
|
||||
["anchorman", "anchormen"],
|
||||
["anchorwoman", "anchorwomen"],
|
||||
["atrium", "atria"],
|
||||
["axis", "axes"],
|
||||
["bacillus", "bacilli"],
|
||||
["bacterium", "bacteria"],
|
||||
["bandsman", "bandsmen"],
|
||||
["bargeman", "bargemen"],
|
||||
["bellman", "bellmen"],
|
||||
["biceps", "biceps"],
|
||||
["boatman", "boatmen"],
|
||||
["bronchus", "bronchi"],
|
||||
["businesswoman", "businesswomen"],
|
||||
["cactus", "cacti"],
|
||||
["cameraperson", "camerapeople"],
|
||||
["candelabrum", "candelabra"],
|
||||
["catharsis", "catharses"],
|
||||
["chairman", "chairmen"],
|
||||
["chairwoman", "chairwomen"],
|
||||
["churchwoman", "churchwomen"],
|
||||
["clansman", "clansmen"],
|
||||
["clanswoman", "clanswomen"],
|
||||
["committeeman", "committeemen"],
|
||||
["committeewoman", "committeewomen"],
|
||||
["continuum", "continua"],
|
||||
["corpus", "corpora"],
|
||||
["craftsman", "craftsmen"],
|
||||
["craftswoman", "craftswomen"],
|
||||
["crisis", "crises"],
|
||||
["cyclops", "cyclopes"],
|
||||
["datum", "data"],
|
||||
["diaeresis", "diaereses"],
|
||||
["diagnosis", "diagnoses"],
|
||||
["dominatrix", "dominatrices"],
|
||||
["draughtsman", "draughtsmen"],
|
||||
["draughtswoman", "draughtswomen"],
|
||||
["effluvium", "effluvia"],
|
||||
["emphasis", "emphases"],
|
||||
["esophagus", "esophagi"],
|
||||
["extremum", "extrema"],
|
||||
["fish", "fish"],
|
||||
["footman", "footmen"],
|
||||
["formula", "formulae"],
|
||||
["forum", "fora"],
|
||||
["freeman", "freemen"],
|
||||
["frontiersman", "frontiersmen"],
|
||||
["frontierswoman", "frontierswomen"],
|
||||
["garbageman", "garbagemen"],
|
||||
["genesis", "geneses"],
|
||||
["genie", "genii"],
|
||||
["genius", "genii"],
|
||||
["genus", "genera"],
|
||||
["glissando", "glissandi"],
|
||||
["graffito", "graffiti"],
|
||||
["grandchild", "grandchildren"],
|
||||
["handyman", "handymen"],
|
||||
["hitman", "hitmen"],
|
||||
["houseman", "housemen"],
|
||||
["iceman", "icemen"],
|
||||
["ilium", "ilia"],
|
||||
["index", "indices"],
|
||||
["intermezzo", "intermezzi"],
|
||||
["journeyman", "journeymen"],
|
||||
["labium", "labia"],
|
||||
["lamina", "laminae"],
|
||||
["laundrywoman", "laundrywomen"],
|
||||
["laywoman", "laywomen"],
|
||||
["linesman", "linesmen"],
|
||||
["lira", " lire"],
|
||||
["longshoreman", "longshoremen"],
|
||||
["louse", "lice"],
|
||||
["madman", "madmen"],
|
||||
["mailman", "mailmen"],
|
||||
["memorandum", "memoranda"],
|
||||
["metathesis", "metatheses"],
|
||||
["minimum", "minima"],
|
||||
["mitosis", "mitoses"],
|
||||
["motorman", "motormen"],
|
||||
["muscleman", "musclemen"],
|
||||
["nemesis", "nemeses"],
|
||||
["nightwatchman", "nightwatchmen"],
|
||||
["oarsman", "oarsmen"],
|
||||
["oarswoman", "oarswomen"],
|
||||
["oasis", "oases"],
|
||||
["ombudsman", "ombudsmen"],
|
||||
["optimum", "optima"],
|
||||
["palazzo", "palazzi"],
|
||||
["papyrus", "papyri"],
|
||||
["parenthesis", "parentheses"],
|
||||
["patina", "patinae"],
|
||||
["patrolman", "patrolmen"],
|
||||
["pericardium", "pericardia"],
|
||||
["periphrasis", "periphrases"],
|
||||
["pharynx", "pharynges"],
|
||||
["phenomenon", "phenomena"],
|
||||
["plainclothesman", "plainclothesmen"],
|
||||
["pneumococcus", "pneumococci"],
|
||||
["pressman", "pressmen"],
|
||||
["prosthesis", "protheses"],
|
||||
["quantum", "quanta"],
|
||||
["radius", "radii"],
|
||||
["radix", "radices"],
|
||||
["repairman", "repairmen"],
|
||||
["salesman", "salesmen"],
|
||||
["saleswoman", "saleswomen"],
|
||||
["sandman", "sandmen"],
|
||||
["schema", "schemata"],
|
||||
["sheep", "sheep"],
|
||||
["shoreman", "shoremen"],
|
||||
["signore", "signori"],
|
||||
["simulacrum", "simulacra"],
|
||||
["solarium", "solaria"],
|
||||
["spokesman", "spokesmen"],
|
||||
["spokesperson", "spokespeople"],
|
||||
["spokeswoman", "spokeswomen"],
|
||||
["statesman", "statesmen"],
|
||||
["stateswoman", "stateswomen"],
|
||||
["steersman", "steersmen"],
|
||||
["stratum", "strata"],
|
||||
["streptococcus", "streptococci"],
|
||||
["succubus", "succubi"],
|
||||
["symbiosis", "symbioses"],
|
||||
["tarsus", "tarsi"],
|
||||
["taxon", "taxa"],
|
||||
["testatrix", "testatrices"],
|
||||
["testis", "testes"],
|
||||
["thesis", "theses"],
|
||||
["thrombosis", "thromboses"],
|
||||
["tooth", "teeth"],
|
||||
["townsman", "townsmen"],
|
||||
["townswoman", "townswomen"],
|
||||
["tradesman", "tradesmen"],
|
||||
["tradeswoman", "tradeswomen"],
|
||||
["uterus", "uteri"],
|
||||
["vertebra", "vertebrae"],
|
||||
["vertex", "vertices"],
|
||||
["vivarium", "vivaria"],
|
||||
["washerwoman", "washerwomen"],
|
||||
["woodlouse", "woodlice"],
|
||||
["workingwoman", "workingwomen"],
|
||||
["workman", "workmen"]
|
||||
]
|
||||
127
harper-core/irregular_verbs.json
Normal file
127
harper-core/irregular_verbs.json
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
[
|
||||
"// comments can appear in the line before an entry",
|
||||
"// or in place of an entry",
|
||||
["arise", "arose", "arisen"],
|
||||
["awake", "awoke", "awoken"],
|
||||
"// be/am/are/is -- was/were -- been",
|
||||
["become", "became", "become"],
|
||||
["begin", "began", "begun"],
|
||||
["bend", "bent", "bent"],
|
||||
["bet", "bet", "bet"],
|
||||
["bid", "bade", "bidden"],
|
||||
["bind", "bound", "bound"],
|
||||
["bite", "bit", "bitten"],
|
||||
["bleed", "bled", "bled"],
|
||||
["blow", "blew", "blown"],
|
||||
["break", "broke", "broken"],
|
||||
["breed", "bred", "bred"],
|
||||
["bring", "brought", "brought"],
|
||||
["build", "built", "built"],
|
||||
["burst", "burst", "burst"],
|
||||
["buy", "bought", "bought"],
|
||||
["catch", "caught", "caught"],
|
||||
["choose", "chose", "chosen"],
|
||||
["come", "came", "come"],
|
||||
["cost", "cost", "cost"],
|
||||
["cut", "cut", "cut"],
|
||||
["dive", "dove", "dove"],
|
||||
["do", "did", "done"],
|
||||
["drink", "drank", "drunk"],
|
||||
["drive", "drove", "driven"],
|
||||
["eat", "ate", "eaten"],
|
||||
["fall", "fell", "fallen"],
|
||||
["feed", "fed", "fed"],
|
||||
["feel", "felt", "felt"],
|
||||
["fight", "fought", "fought"],
|
||||
["find", "found", "found"],
|
||||
["fly", "flew", "flown"],
|
||||
["forget", "forgot", "forgotten"],
|
||||
["forgo", "forwent", "forgone"],
|
||||
["freeze", "froze", "frozen"],
|
||||
"// get -- got -- gotten",
|
||||
["get", "got", "got"],
|
||||
["give", "gave", "given"],
|
||||
["go", "went", "gone"],
|
||||
["grow", "grew", "grown"],
|
||||
["have", "had", "had"],
|
||||
["hear", "heard", "heard"],
|
||||
["hit", "hit", "hit"],
|
||||
["hold", "held", "held"],
|
||||
["hurt", "hurt", "hurt"],
|
||||
["input", "input", "input"],
|
||||
["keep", "kept", "kept"],
|
||||
["know", "knew", "known"],
|
||||
["lay", "laid", "lain"],
|
||||
["lead", "led", "led"],
|
||||
["light", "lit", "lit"],
|
||||
["lose", "lost", "lost"],
|
||||
["make", "made", "made"],
|
||||
["mistake", "mistook", "mistaken"],
|
||||
["output", "output", "output"],
|
||||
["overtake", "overtook", "overtaken"],
|
||||
["overthrow", "overthrew", "overthrown"],
|
||||
["overwrite", "overwrote", "overwritten"],
|
||||
["partake", "partook", "partaken"],
|
||||
["pay", "paid", "paid"],
|
||||
["put", "put", "put"],
|
||||
["read", "read", "read"],
|
||||
["redo", "redid", "redone"],
|
||||
["remake", "remade", "remade"],
|
||||
["reread", "reread", "reread"],
|
||||
["reset", "reset", "reset"],
|
||||
["ride", "rode", "ridden"],
|
||||
["ring", "rang", "rung"],
|
||||
["rise", "rose", "risen"],
|
||||
["run", "ran", "run"],
|
||||
["see", "saw", "seen"],
|
||||
["sell", "sold", "sold"],
|
||||
["send", "sent", "sent"],
|
||||
["set", "set", "set"],
|
||||
["shake", "shook", "shaken"],
|
||||
["shed", "shed", "shed"],
|
||||
["shine", "shone", "shone"],
|
||||
["shoe", "shod", "shod"],
|
||||
["shoot", "shot", "shot"],
|
||||
["show", "showed", "shown"],
|
||||
["shrink", "shrank", "shrunk"],
|
||||
["shut", "shut", "shut"],
|
||||
["sing", "sang", "sung"],
|
||||
"// sink -- sank -- sunken??",
|
||||
["sink", "sank", "sunk"],
|
||||
["sit", "sat", "sat"],
|
||||
["slay", "slew", "slain"],
|
||||
["sleep", "slept", "slept"],
|
||||
["slide", "slid", "slid"],
|
||||
["slit", "slit", "slit"],
|
||||
"// sneak -- sneaked/snuck -- sneaked/snuck",
|
||||
["speak", "spoke", "spoken"],
|
||||
["spin", "spun", "spun"],
|
||||
["spit", "spat", "spat"],
|
||||
["split", "split", "split"],
|
||||
["spread", "spread", "spread"],
|
||||
["spring", "sprang", "sprung"],
|
||||
["stand", "stood", "stood"],
|
||||
["steal", "stole", "stolen"],
|
||||
["stick", "stuck", "stuck"],
|
||||
["sting", "stung", "stung"],
|
||||
["stink", "stank", "stunk"],
|
||||
["stride", "strode", "stridden"],
|
||||
["strike", "struck", "stricken"],
|
||||
["string", "strung", "strung"],
|
||||
["sew", "sewed", "sewn"],
|
||||
["swear", "swore", "sworn"],
|
||||
["swim", "swam", "swum"],
|
||||
["swing", "swung", "swung"],
|
||||
["take", "took", "taken"],
|
||||
["teach", "taught", "taught"],
|
||||
["tear", "tore", "torn"],
|
||||
["think", "thought", "thought"],
|
||||
["throw", "threw", "thrown"],
|
||||
["tread", "trod", "trodden"],
|
||||
["undo", "undid", "undone"],
|
||||
["wake", "woke", "woken"],
|
||||
["wear", "wore", "worn"],
|
||||
["weave", "wove", "woven"],
|
||||
["wind", "wound", "wound"],
|
||||
["write", "wrote", "written"]
|
||||
]
|
||||
|
|
@ -3,7 +3,13 @@ use unicode_width::UnicodeWidthChar;
|
|||
|
||||
use crate::Punctuation;
|
||||
|
||||
pub trait CharExt {
|
||||
mod private {
|
||||
pub trait Sealed {}
|
||||
|
||||
impl Sealed for char {}
|
||||
}
|
||||
|
||||
pub trait CharExt: private::Sealed {
|
||||
fn is_cjk(&self) -> bool;
|
||||
/// Whether a character can be a component of an English word.
|
||||
fn is_english_lingual(&self) -> bool;
|
||||
|
|
|
|||
|
|
@ -7,8 +7,14 @@ use smallvec::SmallVec;
|
|||
/// Most English words are fewer than 12 characters.
|
||||
pub type CharString = SmallVec<[char; 16]>;
|
||||
|
||||
mod private {
|
||||
pub trait Sealed {}
|
||||
|
||||
impl Sealed for [char] {}
|
||||
}
|
||||
|
||||
/// Extensions to character sequences that make them easier to wrangle.
|
||||
pub trait CharStringExt {
|
||||
pub trait CharStringExt: private::Sealed {
|
||||
/// Convert all characters to lowercase, returning a new owned vector if any changes were made.
|
||||
fn to_lower(&'_ self) -> Cow<'_, [char]>;
|
||||
|
||||
|
|
@ -26,6 +32,10 @@ pub trait CharStringExt {
|
|||
/// Only normalizes the left side to lowercase and avoids allocations.
|
||||
fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
|
||||
|
||||
/// Case-insensitive comparison with any of a list of string slices, assuming the right-hand side is lowercase ASCII.
|
||||
/// Only normalizes the left side to lowercase and avoids allocations.
|
||||
fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool;
|
||||
|
||||
/// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
|
||||
/// Only normalizes the left side to lowercase and avoids allocations.
|
||||
fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
|
||||
|
|
@ -46,6 +56,10 @@ pub trait CharStringExt {
|
|||
/// The suffix is assumed to be lowercase.
|
||||
fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
|
||||
|
||||
/// Case-insensitive check if the string ends with any of the given ASCII suffixes.
|
||||
/// The suffixes are assumed to be lowercase.
|
||||
fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool;
|
||||
|
||||
/// Check if the string contains any vowels
|
||||
fn contains_vowel(&self) -> bool;
|
||||
}
|
||||
|
|
@ -99,6 +113,10 @@ impl CharStringExt for [char] {
|
|||
.all(|(a, b)| a.to_ascii_lowercase() == *b)
|
||||
}
|
||||
|
||||
fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool {
|
||||
others.iter().any(|str| self.eq_ignore_ascii_case_str(str))
|
||||
}
|
||||
|
||||
fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
|
||||
others
|
||||
.iter()
|
||||
|
|
@ -148,6 +166,12 @@ impl CharStringExt for [char] {
|
|||
.all(|(a, b)| a.to_ascii_lowercase() == *b)
|
||||
}
|
||||
|
||||
fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool {
|
||||
suffixes
|
||||
.iter()
|
||||
.any(|suffix| self.ends_with_ignore_ascii_case_chars(suffix))
|
||||
}
|
||||
|
||||
fn contains_vowel(&self) -> bool {
|
||||
self.iter().any(|c| c.is_vowel())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,12 +18,20 @@ use crate::{Document, TokenKind, TokenStringExt};
|
|||
/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
|
||||
pub struct DictWordMetadata {
|
||||
/// The main parts of speech which have extra data.
|
||||
pub noun: Option<NounData>,
|
||||
pub pronoun: Option<PronounData>,
|
||||
pub verb: Option<VerbData>,
|
||||
pub adjective: Option<AdjectiveData>,
|
||||
pub adverb: Option<AdverbData>,
|
||||
pub conjunction: Option<ConjunctionData>,
|
||||
pub determiner: Option<DeterminerData>,
|
||||
pub affix: Option<AffixData>,
|
||||
/// Parts of speech which don't have extra data.
|
||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||
#[serde(default = "default_false")]
|
||||
pub preposition: bool,
|
||||
/// Whether the word is an offensive word.
|
||||
pub swear: Option<bool>,
|
||||
/// The dialects this word belongs to.
|
||||
/// If no dialects are defined, it can be assumed that the word is
|
||||
|
|
@ -33,19 +41,17 @@ pub struct DictWordMetadata {
|
|||
/// Orthographic information: letter case, spaces, hyphens, etc.
|
||||
#[serde(default = "OrthFlags::empty")]
|
||||
pub orth_info: OrthFlags,
|
||||
/// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
|
||||
pub determiner: Option<DeterminerData>,
|
||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||
#[serde(default = "default_false")]
|
||||
pub preposition: bool,
|
||||
/// Whether the word is considered especially common.
|
||||
#[serde(default = "default_false")]
|
||||
pub common: bool,
|
||||
#[serde(default = "default_none")]
|
||||
pub derived_from: Option<WordId>,
|
||||
/// Generated by a chunker
|
||||
/// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using
|
||||
/// this should be preferred over the similarly named `Pattern`.
|
||||
///
|
||||
/// For more details, see [the announcement blog post](https://elijahpotter.dev/articles/training_a_chunker_with_burn).
|
||||
pub np_member: Option<bool>,
|
||||
/// Generated by a POS tagger
|
||||
/// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
|
||||
pub pos_tag: Option<UPOS>,
|
||||
}
|
||||
|
||||
|
|
@ -186,11 +192,12 @@ impl DictWordMetadata {
|
|||
adjective: merge!(self.adjective, other.adjective),
|
||||
adverb: merge!(self.adverb, other.adverb),
|
||||
conjunction: merge!(self.conjunction, other.conjunction),
|
||||
determiner: merge!(self.determiner, other.determiner),
|
||||
affix: merge!(self.affix, other.affix),
|
||||
preposition: self.preposition || other.preposition,
|
||||
dialects: self.dialects | other.dialects,
|
||||
orth_info: self.orth_info | other.orth_info,
|
||||
swear: self.swear.or(other.swear),
|
||||
determiner: merge!(self.determiner, other.determiner),
|
||||
preposition: self.preposition || other.preposition,
|
||||
common: self.common || other.common,
|
||||
derived_from: self.derived_from.or(other.derived_from),
|
||||
pos_tag: self.pos_tag.or(other.pos_tag),
|
||||
|
|
@ -231,6 +238,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
PROPN => {
|
||||
|
|
@ -256,6 +264,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
PRON => {
|
||||
|
|
@ -269,6 +278,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
VERB => {
|
||||
|
|
@ -290,6 +300,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
AUX => {
|
||||
|
|
@ -311,6 +322,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADJ => {
|
||||
|
|
@ -324,6 +336,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADV => {
|
||||
|
|
@ -337,6 +350,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADP => {
|
||||
|
|
@ -347,6 +361,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = true;
|
||||
}
|
||||
DET => {
|
||||
|
|
@ -356,6 +371,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
self.determiner = Some(DeterminerData::default());
|
||||
}
|
||||
|
|
@ -370,6 +386,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.adverb = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -955,6 +972,22 @@ impl ConjunctionData {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
|
||||
pub struct AffixData {
|
||||
pub is_prefix: Option<bool>,
|
||||
pub is_suffix: Option<bool>,
|
||||
}
|
||||
|
||||
impl AffixData {
|
||||
/// Produce a copy of `self` with the known properties of `other` set.
|
||||
pub fn or(&self, _other: &Self) -> Self {
|
||||
Self {
|
||||
is_prefix: self.is_prefix.or(_other.is_prefix),
|
||||
is_suffix: self.is_suffix.or(_other.is_suffix),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A regional dialect.
|
||||
///
|
||||
/// Note: these have bit-shifted values so that they can ergonomically integrate with
|
||||
|
|
|
|||
|
|
@ -918,6 +918,7 @@ impl TokenStringExt for Document {
|
|||
create_fns_on_doc!(verb);
|
||||
create_fns_on_doc!(word);
|
||||
create_fns_on_doc!(word_like);
|
||||
create_fns_on_doc!(heading_start);
|
||||
|
||||
fn first_sentence_word(&self) -> Option<&Token> {
|
||||
self.tokens.first_sentence_word()
|
||||
|
|
@ -947,6 +948,10 @@ impl TokenStringExt for Document {
|
|||
self.tokens.iter_paragraphs()
|
||||
}
|
||||
|
||||
fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
|
||||
self.tokens.iter_headings()
|
||||
}
|
||||
|
||||
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
|
||||
self.tokens.iter_sentences()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,17 +42,13 @@ impl FixedPhrase {
|
|||
phrase = phrase.then_whitespace();
|
||||
}
|
||||
TokenKind::Punctuation(p) => {
|
||||
phrase = phrase.then(move |t: &Token, _source: &[char]| {
|
||||
t.kind.as_punctuation().cloned() == Some(p)
|
||||
})
|
||||
phrase = phrase
|
||||
.then_kind_where(move |kind| kind.as_punctuation().cloned() == Some(p));
|
||||
}
|
||||
TokenKind::ParagraphBreak => {
|
||||
phrase = phrase.then_whitespace();
|
||||
}
|
||||
TokenKind::Number(n) => {
|
||||
phrase = phrase
|
||||
.then(move |tok: &Token, _source: &[char]| tok.kind == TokenKind::Number(n))
|
||||
}
|
||||
TokenKind::Number(_) => phrase = phrase.then_kind_where(|kind| kind.is_number()),
|
||||
_ => panic!("Fell out of expected document formats."),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ where
|
|||
pub trait OwnedExprExt {
|
||||
fn or(self, other: impl Expr + 'static) -> FirstMatchOf;
|
||||
fn and(self, other: impl Expr + 'static) -> All;
|
||||
fn and_not(self, other: impl Expr + 'static) -> All;
|
||||
fn or_longest(self, other: impl Expr + 'static) -> LongestMatchOf;
|
||||
}
|
||||
|
||||
|
|
@ -172,6 +173,11 @@ where
|
|||
All::new(vec![Box::new(self), Box::new(other)])
|
||||
}
|
||||
|
||||
/// Returns an expression that matches only if the current one matches and the expression contained in `other` does not.
|
||||
fn and_not(self, other: impl Expr + 'static) -> All {
|
||||
self.and(UnlessStep::new(other, |_tok: &Token, _src: &[char]| true))
|
||||
}
|
||||
|
||||
/// Returns an expression that matches the longest of the current one or the expression contained in `other`.
|
||||
///
|
||||
/// If you don't need the longest match, prefer using the short-circuiting [`Self::or()`] instead.
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ macro_rules! gen_then_from_is {
|
|||
paste! {
|
||||
#[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
|
||||
pub fn [< then_$quality >] (self) -> Self{
|
||||
self.then(|tok: &Token, _source: &[char]| {
|
||||
tok.kind.[< is_$quality >]()
|
||||
self.then_kind_where(|kind| {
|
||||
kind.[< is_$quality >]()
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -40,12 +40,8 @@ macro_rules! gen_then_from_is {
|
|||
|
||||
#[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
|
||||
pub fn [< then_anything_but_$quality >] (self) -> Self{
|
||||
self.then(|tok: &Token, _source: &[char]| {
|
||||
if tok.kind.[< is_$quality >](){
|
||||
false
|
||||
}else{
|
||||
true
|
||||
}
|
||||
self.then_kind_where(|kind| {
|
||||
!kind.[< is_$quality >]()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -84,6 +80,13 @@ impl Expr for SequenceExpr {
|
|||
impl SequenceExpr {
|
||||
// Constructor methods
|
||||
|
||||
// Single token methods
|
||||
|
||||
/// Construct a new sequence with an [`AnyPattern`] at the beginning of the operation list.
|
||||
pub fn anything() -> Self {
|
||||
Self::default().then_anything()
|
||||
}
|
||||
|
||||
// Single word token methods
|
||||
|
||||
/// Construct a new sequence with a [`Word`] at the beginning of the operation list.
|
||||
|
|
@ -170,9 +173,9 @@ impl SequenceExpr {
|
|||
self.then(WordSet::new(words))
|
||||
}
|
||||
|
||||
/// Matches any token whose `Kind` exactly matches.
|
||||
pub fn then_strict(self, kind: TokenKind) -> Self {
|
||||
self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
|
||||
/// Shorthand for [`Self::then_word_set`].
|
||||
pub fn t_set(self, words: &'static [&'static str]) -> Self {
|
||||
self.then_word_set(words)
|
||||
}
|
||||
|
||||
/// Match against one or more whitespace tokens.
|
||||
|
|
@ -180,6 +183,11 @@ impl SequenceExpr {
|
|||
self.then(WhitespacePattern)
|
||||
}
|
||||
|
||||
/// Shorthand for [`Self::then_whitespace`].
|
||||
pub fn t_ws(self) -> Self {
|
||||
self.then_whitespace()
|
||||
}
|
||||
|
||||
/// Match against one or more whitespace tokens.
|
||||
pub fn then_whitespace_or_hyphen(self) -> Self {
|
||||
self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
|
||||
|
|
@ -190,11 +198,6 @@ impl SequenceExpr {
|
|||
self.then_whitespace_or_hyphen()
|
||||
}
|
||||
|
||||
/// Shorthand for [`Self::then_whitespace`].
|
||||
pub fn t_ws(self) -> Self {
|
||||
self.then_whitespace()
|
||||
}
|
||||
|
||||
pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
|
||||
self.then(Repeating::new(Box::new(expr), 1))
|
||||
}
|
||||
|
|
@ -229,7 +232,7 @@ impl SequenceExpr {
|
|||
|
||||
/// Matches any word.
|
||||
pub fn then_any_word(self) -> Self {
|
||||
self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
|
||||
self.then_kind_where(|kind| kind.is_word())
|
||||
}
|
||||
|
||||
/// Match examples of `word` that have any capitalization.
|
||||
|
|
@ -266,6 +269,23 @@ impl SequenceExpr {
|
|||
|
||||
// One kind
|
||||
|
||||
/// Matches any token whose `Kind` exactly matches.
|
||||
pub fn then_kind(self, kind: TokenKind) -> Self {
|
||||
self.then_kind_where(move |k| kind == *k)
|
||||
}
|
||||
|
||||
/// Matches a token where the provided closure returns true for the token's kind.
|
||||
pub fn then_kind_where<F>(mut self, predicate: F) -> Self
|
||||
where
|
||||
F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.exprs
|
||||
.push(Box::new(move |tok: &Token, _source: &[char]| {
|
||||
predicate(&tok.kind)
|
||||
}));
|
||||
self
|
||||
}
|
||||
|
||||
/// Match a token of a given kind which is not in the list of words.
|
||||
pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
|
||||
where
|
||||
|
|
@ -288,7 +308,7 @@ impl SequenceExpr {
|
|||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| pred_is_1(&tok.kind) && pred_is_2(&tok.kind))
|
||||
self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
|
||||
}
|
||||
|
||||
/// Match a token where either of the two token kind predicates returns true.
|
||||
|
|
@ -298,7 +318,17 @@ impl SequenceExpr {
|
|||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| pred_is_1(&tok.kind) || pred_is_2(&tok.kind))
|
||||
self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
|
||||
}
|
||||
|
||||
/// Match a token where neither of the two token kind predicates returns true.
|
||||
/// For instance, a word that can't be a verb or a noun.
|
||||
pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and the second returns false.
|
||||
|
|
@ -308,7 +338,7 @@ impl SequenceExpr {
|
|||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| pred_is(&tok.kind) && !pred_not(&tok.kind))
|
||||
self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and the second returns false,
|
||||
|
|
@ -332,6 +362,42 @@ impl SequenceExpr {
|
|||
})
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and all of the second return false.
|
||||
/// For instance, a word that can be a verb but not a noun or an adjective.
|
||||
pub fn then_kind_is_but_isnt_any_of<F1, F2>(
|
||||
self,
|
||||
pred_is: F1,
|
||||
preds_isnt: &'static [F2],
|
||||
) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and all of the second return false,
|
||||
/// and the token is not in the list of exceptions.
|
||||
/// For instance, an adjective that isn't also a verb or adverb or the word "likely".
|
||||
pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
|
||||
self,
|
||||
pred_is: F1,
|
||||
preds_isnt: &'static [F2],
|
||||
ex: &'static [&'static str],
|
||||
) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, src: &[char]| {
|
||||
pred_is(&tok.kind)
|
||||
&& !preds_isnt.iter().any(|pred| pred(&tok.kind))
|
||||
&& !ex
|
||||
.iter()
|
||||
.any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
|
||||
})
|
||||
}
|
||||
|
||||
gen_then_from_is!(sentence_terminator);
|
||||
// More than two kinds
|
||||
|
||||
|
|
@ -341,7 +407,16 @@ impl SequenceExpr {
|
|||
where
|
||||
F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| preds_is.iter().any(|pred| pred(&tok.kind)))
|
||||
self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
|
||||
}
|
||||
|
||||
/// Match a token where none of the token kind predicates returns true.
|
||||
/// Like `then_kind_neither` but for more than two predicates.
|
||||
pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
|
||||
where
|
||||
F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
|
||||
}
|
||||
|
||||
/// Match a token where any of the token kind predicates returns true,
|
||||
|
|
@ -456,6 +531,7 @@ impl SequenceExpr {
|
|||
// Adverbs
|
||||
|
||||
gen_then_from_is!(adverb);
|
||||
gen_then_from_is!(frequency_adverb);
|
||||
|
||||
// Determiners
|
||||
|
||||
|
|
|
|||
121
harper-core/src/irregular_nouns.rs
Normal file
121
harper-core/src/irregular_nouns.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
type Noun = (String, String);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IrregularNouns {
|
||||
nouns: Vec<Noun>,
|
||||
}
|
||||
|
||||
/// The uncached function that is used to produce the original copy of the
|
||||
/// irregular noun table.
|
||||
fn uncached_inner_new() -> Arc<IrregularNouns> {
|
||||
IrregularNouns::from_json_file(include_str!("../irregular_nouns.json"))
|
||||
.map(Arc::new)
|
||||
.unwrap_or_else(|e| panic!("Failed to load irregular noun table: {}", e))
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref NOUNS: Arc<IrregularNouns> = uncached_inner_new();
|
||||
}
|
||||
|
||||
impl IrregularNouns {
|
||||
pub fn new() -> Self {
|
||||
Self { nouns: vec![] }
|
||||
}
|
||||
|
||||
pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
|
||||
// Deserialize into Vec<serde_json::Value> to handle mixed types
|
||||
let values: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json).expect("Failed to parse irregular nouns JSON");
|
||||
|
||||
let mut nouns = Vec::new();
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
serde_json::Value::Array(arr) if arr.len() == 2 => {
|
||||
// Handle array of 2 strings
|
||||
if let (Some(singular), Some(plural)) = (arr[0].as_str(), arr[1].as_str()) {
|
||||
nouns.push((singular.to_string(), plural.to_string()));
|
||||
}
|
||||
}
|
||||
// Strings are used for comments to guide contributors editing the file
|
||||
serde_json::Value::String(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { nouns })
|
||||
}
|
||||
|
||||
pub fn curated() -> Arc<Self> {
|
||||
(*NOUNS).clone()
|
||||
}
|
||||
|
||||
pub fn get_plural_for_singular(&self, singular: &str) -> Option<&str> {
|
||||
self.nouns
|
||||
.iter()
|
||||
.find(|(sg, _)| sg.eq_ignore_ascii_case(singular))
|
||||
.map(|(_, pl)| pl.as_str())
|
||||
}
|
||||
|
||||
pub fn get_singular_for_plural(&self, plural: &str) -> Option<&str> {
|
||||
self.nouns
|
||||
.iter()
|
||||
.find(|(_, pl)| pl.eq_ignore_ascii_case(plural))
|
||||
.map(|(sg, _)| sg.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IrregularNouns {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_plural_for_singular_lowercase() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("man"),
|
||||
Some("men")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_plural_for_singular_uppercase() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("WOMAN"),
|
||||
Some("women")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_singular_for_irregular_plural() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_singular_for_plural("children"),
|
||||
Some("child")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_regular_plural() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("car"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_non_noun() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("the"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
120
harper-core/src/irregular_verbs.rs
Normal file
120
harper-core/src/irregular_verbs.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
type Verb = (String, String, String);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IrregularVerbs {
|
||||
verbs: Vec<Verb>,
|
||||
}
|
||||
|
||||
/// The uncached function that is used to produce the original copy of the
|
||||
/// irregular verb table.
|
||||
fn uncached_inner_new() -> Arc<IrregularVerbs> {
|
||||
IrregularVerbs::from_json_file(include_str!("../irregular_verbs.json"))
|
||||
.map(Arc::new)
|
||||
.unwrap_or_else(|e| panic!("Failed to load irregular verb table: {}", e))
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref VERBS: Arc<IrregularVerbs> = uncached_inner_new();
|
||||
}
|
||||
|
||||
impl IrregularVerbs {
|
||||
pub fn new() -> Self {
|
||||
Self { verbs: vec![] }
|
||||
}
|
||||
|
||||
pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
|
||||
// Deserialize into Vec<serde_json::Value> to handle mixed types
|
||||
let values: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json).expect("Failed to parse irregular verbs JSON");
|
||||
|
||||
let mut verbs = Vec::new();
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
serde_json::Value::Array(arr) if arr.len() == 3 => {
|
||||
// Handle array of 3 strings
|
||||
if let (Some(lemma), Some(preterite), Some(past_participle)) =
|
||||
(arr[0].as_str(), arr[1].as_str(), arr[2].as_str())
|
||||
{
|
||||
verbs.push((
|
||||
lemma.to_string(),
|
||||
preterite.to_string(),
|
||||
past_participle.to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// Strings are used for comments to guide contributors editing the file
|
||||
serde_json::Value::String(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { verbs })
|
||||
}
|
||||
|
||||
pub fn curated() -> Arc<Self> {
|
||||
(*VERBS).clone()
|
||||
}
|
||||
|
||||
pub fn get_past_participle_for_preterite(&self, preterite: &str) -> Option<&str> {
|
||||
self.verbs
|
||||
.iter()
|
||||
.find(|(_, pt, _)| pt.eq_ignore_ascii_case(preterite))
|
||||
.map(|(_, _, pp)| pp.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IrregularVerbs {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_for_preterite_lowercase() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("arose"),
|
||||
Some("arisen")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_for_preterite_uppercase() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("WENT"),
|
||||
Some("gone")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_same_as_past_tense() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("taught"),
|
||||
Some("taught")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_regular_past_participle() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("walked"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_non_verb() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("the"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -11,6 +11,8 @@ mod edit_distance;
|
|||
pub mod expr;
|
||||
mod fat_token;
|
||||
mod ignored_lints;
|
||||
mod irregular_nouns;
|
||||
mod irregular_verbs;
|
||||
pub mod language_detection;
|
||||
mod lexing;
|
||||
pub mod linting;
|
||||
|
|
@ -42,6 +44,8 @@ pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
|
|||
pub use document::Document;
|
||||
pub use fat_token::{FatStringToken, FatToken};
|
||||
pub use ignored_lints::{IgnoredLints, LintContext};
|
||||
pub use irregular_nouns::IrregularNouns;
|
||||
pub use irregular_verbs::IrregularVerbs;
|
||||
use linting::Lint;
|
||||
pub use mask::{Mask, Masker};
|
||||
pub use number::{Number, OrdinalSuffix};
|
||||
|
|
|
|||
|
|
@ -12,11 +12,13 @@ pub struct AdjectiveDoubleDegree {
|
|||
impl Default for AdjectiveDoubleDegree {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(SequenceExpr::word_set(&["more", "most"]).t_ws().then(
|
||||
|tok: &Token, _src: &[char]| {
|
||||
tok.kind.is_comparative_adjective() || tok.kind.is_superlative_adjective()
|
||||
},
|
||||
)),
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&["more", "most"])
|
||||
.t_ws()
|
||||
.then_kind_where(|kind| {
|
||||
kind.is_comparative_adjective() || kind.is_superlative_adjective()
|
||||
}),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
144
harper-core/src/linting/apart_from.rs
Normal file
144
harper-core/src/linting/apart_from.rs
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct ApartFrom {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for ApartFrom {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::any_capitalization_of("apart")
|
||||
.t_ws()
|
||||
.then_any_capitalization_of("form");
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for ApartFrom {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let span = matched_tokens.last()?.span;
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::WordChoice,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"from",
|
||||
span.get_content(source),
|
||||
)],
|
||||
message: "Use `from` to spell `apart from`.".to_owned(),
|
||||
priority: 50,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Flags the misspelling `apart form` and suggests `apart from`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ApartFrom;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn corrects_basic_typo() {
|
||||
assert_suggestion_result(
|
||||
"Christianity was set apart form other religions.",
|
||||
ApartFrom::default(),
|
||||
"Christianity was set apart from other religions.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_title_case() {
|
||||
assert_suggestion_result(
|
||||
"Apart Form these files, everything uploaded fine.",
|
||||
ApartFrom::default(),
|
||||
"Apart From these files, everything uploaded fine.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_all_caps() {
|
||||
assert_suggestion_result(
|
||||
"APART FORM THE REST OF THE FIELD.",
|
||||
ApartFrom::default(),
|
||||
"APART FROM THE REST OF THE FIELD.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_with_comma() {
|
||||
assert_suggestion_result(
|
||||
"It was apart form, not apart from, the original plan.",
|
||||
ApartFrom::default(),
|
||||
"It was apart from, not apart from, the original plan.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_with_newline() {
|
||||
assert_suggestion_result(
|
||||
"They stood apart\nform everyone else at the rally.",
|
||||
ApartFrom::default(),
|
||||
"They stood apart\nfrom everyone else at the rally.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_extra_spacing() {
|
||||
assert_suggestion_result(
|
||||
"We keep the archive apart form public assets.",
|
||||
ApartFrom::default(),
|
||||
"We keep the archive apart from public assets.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_correct_phrase() {
|
||||
assert_lint_count(
|
||||
"Lebanon's freedoms set it apart from other Arab states.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_hyphenated() {
|
||||
assert_lint_count(
|
||||
"Their apart-form design wasn’t what we needed.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_split_by_comma() {
|
||||
assert_lint_count(
|
||||
"They stood apart, form lines when asked.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_unrelated_form_usage() {
|
||||
assert_lint_count(
|
||||
"The form was kept apart to dry after printing.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
202
harper-core/src/linting/brand_brandish.rs
Normal file
202
harper-core/src/linting/brand_brandish.rs
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
use crate::{
|
||||
Lint, Token, TokenKind,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
};
|
||||
|
||||
pub struct BrandBrandish {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for BrandBrandish {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&["brandish", "brandished", "brandishes", "brandishing"])
|
||||
.t_ws()
|
||||
// "her" is also a possessive determiner as in "she brandished her sword"
|
||||
// "it" and "them" can refer to objects as in "draw your sword(s) and brandish it/them"
|
||||
.then_kind_except(TokenKind::is_object_pronoun, &["her", "it", "them"]),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for BrandBrandish {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let verb_span = toks.first()?.span;
|
||||
let verb_chars = verb_span.get_content(src);
|
||||
|
||||
enum Form {
|
||||
Base,
|
||||
Past,
|
||||
ThirdPerson,
|
||||
Ing,
|
||||
}
|
||||
|
||||
let infl = match verb_chars.last().map(|c| c.to_ascii_lowercase()) {
|
||||
Some('h') => Form::Base,
|
||||
Some('d') => Form::Past,
|
||||
Some('s') => Form::ThirdPerson,
|
||||
Some('g') => Form::Ing,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(Lint {
|
||||
span: verb_span,
|
||||
lint_kind: LintKind::Malapropism,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
match infl {
|
||||
Form::Base => "brand",
|
||||
Form::Past => "branded",
|
||||
Form::ThirdPerson => "brands",
|
||||
Form::Ing => "branding",
|
||||
},
|
||||
verb_chars,
|
||||
)],
|
||||
message: "`Brandish` means to wield a weapon. You probably mean `brand`.".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Looks for `brandish` wrongly used when `brand` is intended."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{brand_brandish::BrandBrandish, tests::assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_a_traitor() {
|
||||
assert_suggestion_result(
|
||||
"Unretire Gretzky's sweater . Brandish him a traitor.",
|
||||
BrandBrandish::default(),
|
||||
"Unretire Gretzky's sweater . Brand him a traitor.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_a_criminal() {
|
||||
assert_suggestion_result(
|
||||
"lied to stop kuma's ideology from taking root and to brandish him a criminal that they could arrest",
|
||||
BrandBrandish::default(),
|
||||
"lied to stop kuma's ideology from taking root and to brand him a criminal that they could arrest",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_as_a() {
|
||||
assert_suggestion_result(
|
||||
"he was so afraid his thoughts could brandish him as a paedophile",
|
||||
BrandBrandish::default(),
|
||||
"he was so afraid his thoughts could brand him as a paedophile",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_an_offender() {
|
||||
assert_suggestion_result(
|
||||
"Chanel Oberlin's reason for purposely leading on Pete Martinez in order to humiliate him and brandish him a registered sex offender",
|
||||
BrandBrandish::default(),
|
||||
"Chanel Oberlin's reason for purposely leading on Pete Martinez in order to humiliate him and brand him a registered sex offender",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_with_nicknames() {
|
||||
assert_suggestion_result(
|
||||
"?? spoke out over the move by Kenyans to continuously brandish him with nicknames even after ...",
|
||||
BrandBrandish::default(),
|
||||
"?? spoke out over the move by Kenyans to continuously brand him with nicknames even after ...",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_as_a_aymbol() {
|
||||
assert_suggestion_result(
|
||||
"brandish him as an acclaimed symbol of humility, integrity and incorruptibility in the face of today's corrupt economic and political elite1",
|
||||
BrandBrandish::default(),
|
||||
"brand him as an acclaimed symbol of humility, integrity and incorruptibility in the face of today's corrupt economic and political elite1",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_as_illegal() {
|
||||
assert_suggestion_result(
|
||||
"To attempt to brandish him as an “illegal immigrant” is absolutely ridiculous and warrants an immediate retraction and apology.",
|
||||
BrandBrandish::default(),
|
||||
"To attempt to brand him as an “illegal immigrant” is absolutely ridiculous and warrants an immediate retraction and apology.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_with_nickname() {
|
||||
assert_suggestion_result(
|
||||
"The small minded townsfolk brandish him with the nickname \"Genepool\" due to his physical and cognitive shortcomings.",
|
||||
BrandBrandish::default(),
|
||||
"The small minded townsfolk brand him with the nickname \"Genepool\" due to his physical and cognitive shortcomings.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_with_label() {
|
||||
assert_suggestion_result(
|
||||
"One such reason that critics brandish him with this label is due to Peterson's opposition to Canada's Bill C-16",
|
||||
BrandBrandish::default(),
|
||||
"One such reason that critics brand him with this label is due to Peterson's opposition to Canada's Bill C-16",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandished_us() {
|
||||
assert_suggestion_result(
|
||||
"The mark they brandished us with will fade to dust when we finally meet our end.",
|
||||
BrandBrandish::default(),
|
||||
"The mark they branded us with will fade to dust when we finally meet our end.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandishing_him() {
|
||||
assert_suggestion_result(
|
||||
"he said some words trying to hit back at the center for brandishing him as a Pakistani at an NRC rally",
|
||||
BrandBrandish::default(),
|
||||
"he said some words trying to hit back at the center for branding him as a Pakistani at an NRC rally",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_us() {
|
||||
assert_suggestion_result(
|
||||
"Our resolute determination for the ultimate quality and all-inclusive directory of food commodities brandish us as a flawless associate in B2B",
|
||||
BrandBrandish::default(),
|
||||
"Our resolute determination for the ultimate quality and all-inclusive directory of food commodities brand us as a flawless associate in B2B",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandished_him() {
|
||||
assert_suggestion_result(
|
||||
"Frank discovers Myra brandished him with the letter 'R', for rapist.",
|
||||
BrandBrandish::default(),
|
||||
"Frank discovers Myra branded him with the letter 'R', for rapist.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandishes_him() {
|
||||
assert_suggestion_result(
|
||||
"Whether one turns a blind eye to Tim's wrongs or brandishes him a traitor will plant audiences in their own personal line in the sand.",
|
||||
BrandBrandish::default(),
|
||||
"Whether one turns a blind eye to Tim's wrongs or brands him a traitor will plant audiences in their own personal line in the sand.",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,9 @@
|
|||
use crate::expr::All;
|
||||
use crate::expr::Expr;
|
||||
use crate::expr::MergeableWords;
|
||||
use crate::expr::OwnedExprExt;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::patterns::InflectionOfBe;
|
||||
use crate::{CharStringExt, TokenStringExt, linting::ExprLinter};
|
||||
|
||||
use super::{Lint, LintKind, Suggestion, is_content_word, predicate};
|
||||
|
|
@ -31,7 +33,7 @@ impl Default for CompoundNounAfterDetAdj {
|
|||
.t_ws()
|
||||
.then(is_content_word)
|
||||
.t_ws()
|
||||
.then(is_content_word);
|
||||
.then(is_content_word.and_not(InflectionOfBe::default()));
|
||||
|
||||
let split_expr = Lrc::new(MergeableWords::new(|meta_closed, meta_open| {
|
||||
predicate(meta_closed, meta_open)
|
||||
|
|
@ -39,12 +41,7 @@ impl Default for CompoundNounAfterDetAdj {
|
|||
|
||||
let mut expr = All::default();
|
||||
expr.add(context_expr);
|
||||
expr.add(
|
||||
SequenceExpr::default()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.then(split_expr.clone()),
|
||||
);
|
||||
expr.add(SequenceExpr::anything().t_any().then(split_expr.clone()));
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
|
|
|
|||
147
harper-core/src/linting/cure_for.rs
Normal file
147
harper-core/src/linting/cure_for.rs
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
use crate::{
|
||||
Span, Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::{DerivedFrom, Word},
|
||||
};
|
||||
|
||||
pub struct CureFor {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for CureFor {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then(DerivedFrom::new_from_str("cure"))
|
||||
.t_ws()
|
||||
.then(Word::new("against"));
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for CureFor {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let against = matched_tokens.last()?;
|
||||
|
||||
let template: Vec<char> = against.span.get_content(source).to_vec();
|
||||
let suggestion = Suggestion::replace_with_match_case_str("for", &template);
|
||||
|
||||
Some(Lint {
|
||||
span: Span::new(against.span.start, against.span.end),
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![suggestion],
|
||||
message: "Prefer `cure for` when describing a treatment target.".to_owned(),
|
||||
priority: 31,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Flags `cure against` and prefers the standard `cure for` pairing."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::CureFor;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn corrects_simple_cure_against() {
|
||||
assert_suggestion_result(
|
||||
"Researchers sought a cure against the stubborn illness.",
|
||||
CureFor::default(),
|
||||
"Researchers sought a cure for the stubborn illness.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_plural_cures_against() {
|
||||
assert_suggestion_result(
|
||||
"Doctors insist this serum cures against the new variant.",
|
||||
CureFor::default(),
|
||||
"Doctors insist this serum cures for the new variant.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_past_participle_cured_against() {
|
||||
assert_suggestion_result(
|
||||
"The remedy was cured against the infection last spring.",
|
||||
CureFor::default(),
|
||||
"The remedy was cured for the infection last spring.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_uppercase_against() {
|
||||
assert_suggestion_result(
|
||||
"We still trust the cure AGAINST the dreaded plague.",
|
||||
CureFor::default(),
|
||||
"We still trust the cure FOR the dreaded plague.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_at_sentence_start() {
|
||||
assert_suggestion_result(
|
||||
"Cure against that condition became the rallying cry.",
|
||||
CureFor::default(),
|
||||
"Cure for that condition became the rallying cry.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_cure_for() {
|
||||
assert_lint_count(
|
||||
"They finally found a cure for the fever.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_cure_from() {
|
||||
assert_lint_count(
|
||||
"A cure from this rare herb is on the horizon.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_with_comma() {
|
||||
assert_lint_count(
|
||||
"A cure, against all odds, appeared in the files.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_unrelated_against() {
|
||||
assert_lint_count(
|
||||
"Travelers stand against the roaring wind on the cliffs.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_secure_against() {
|
||||
assert_lint_count(
|
||||
"The fortress stayed secure against the invaders.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -110,7 +110,7 @@ mod tests {
|
|||
#[test]
|
||||
fn multiple_dollar() {
|
||||
assert_suggestion_result(
|
||||
"They were either 25$ 24$ or 23$.",
|
||||
"They were either 25\\$ 24\\$ or 23\\$.",
|
||||
CurrencyPlacement::default(),
|
||||
"They were either $25 $24 or $23.",
|
||||
);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ pub struct DeterminerWithoutNoun {
|
|||
impl Default for DeterminerWithoutNoun {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then(|tok: &Token, _: &[char]| tok.kind.is_determiner())
|
||||
.then_kind_where(|kind| kind.is_determiner())
|
||||
.t_ws()
|
||||
.then_conjunction();
|
||||
|
||||
|
|
|
|||
206
harper-core/src/linting/disjoint_prefixes.rs
Normal file
206
harper-core/src/linting/disjoint_prefixes.rs
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
use crate::{
|
||||
Lint, Token, TokenKind, TokenStringExt,
|
||||
expr::{Expr, OwnedExprExt, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
spell::Dictionary,
|
||||
};
|
||||
|
||||
pub struct DisjointPrefixes<D> {
|
||||
expr: Box<dyn Expr>,
|
||||
dict: D,
|
||||
}
|
||||
|
||||
// Known false positives not to join to these prefixes:
|
||||
const OUT_EXCEPTIONS: &[&str] = &["boxes", "facing", "live", "numbers", "playing"];
|
||||
const OVER_EXCEPTIONS: &[&str] = &["all", "joy", "long", "night", "reading", "steps", "time"];
|
||||
const UNDER_EXCEPTIONS: &[&str] = &["development", "mine"];
|
||||
const UP_EXCEPTIONS: &[&str] = &["loading", "right", "state", "time", "trend"];
|
||||
|
||||
impl<D> DisjointPrefixes<D>
|
||||
where
|
||||
D: Dictionary,
|
||||
{
|
||||
pub fn new(dict: D) -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&[
|
||||
// These prefixes rarely cause false positives
|
||||
"anti", "auto", "bi", "counter", "de", "dis", "extra", "fore", "hyper", "il",
|
||||
"im", "inter", "ir", "macro", "mal", "micro", "mid", "mini", "mis", "mono",
|
||||
"multi", "non", "omni", "post", "pre", "pro", "re", "semi", "sub", "super",
|
||||
"trans", "tri", "ultra", "un", "uni",
|
||||
// "co" has one very common false positive: co-op != coop
|
||||
"co",
|
||||
// These prefixes are all also words in their own right, which leads to more false positives.
|
||||
"out", "over", "under",
|
||||
"up",
|
||||
// These prefixes are commented out due to too many false positives
|
||||
// or incorrect transformations:
|
||||
// "a": a live -> alive
|
||||
// "in": in C -> inc; in action -> inaction
|
||||
])
|
||||
.t_ws_h()
|
||||
.then_kind_either(TokenKind::is_verb, TokenKind::is_noun)
|
||||
.then_optional_hyphen()
|
||||
.and_not(SequenceExpr::any_of(vec![
|
||||
// No trailing hyphen. Ex: Custom patterns take precedence over built-in patterns -> overbuilt
|
||||
Box::new(SequenceExpr::anything().t_any().t_any().then_hyphen()),
|
||||
// Don't merge "co op" whether separated by space or hyphen.
|
||||
Box::new(SequenceExpr::aco("co").t_any().t_set(&["op", "ops"])),
|
||||
// Merge these if they're separated by hyphen, but not space.
|
||||
Box::new(SequenceExpr::aco("out").t_ws().t_set(OUT_EXCEPTIONS)),
|
||||
Box::new(SequenceExpr::aco("over").t_ws().t_set(OVER_EXCEPTIONS)),
|
||||
Box::new(SequenceExpr::aco("under").t_ws().t_set(UNDER_EXCEPTIONS)),
|
||||
Box::new(SequenceExpr::aco("up").t_ws().t_set(UP_EXCEPTIONS)),
|
||||
])),
|
||||
),
|
||||
dict,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D> ExprLinter for DisjointPrefixes<D>
|
||||
where
|
||||
D: Dictionary,
|
||||
{
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
toks: &[Token],
|
||||
src: &[char],
|
||||
ctx: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
let toks_span = toks.span()?;
|
||||
let (pre, _) = ctx?;
|
||||
|
||||
// Cloud Native Pub-Sub System at Pinterest -> subsystem
|
||||
if pre.last().is_some_and(|p| p.kind.is_hyphen()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Avoid including text from unlintable sections between tokens
|
||||
// that could result from naively using toks.span()?.get_content_string(src)
|
||||
let original = format!(
|
||||
"{}{}{}",
|
||||
toks[0].span.get_content_string(src),
|
||||
if toks[1].kind.is_hyphen() { '-' } else { ' ' },
|
||||
toks[2].span.get_content_string(src)
|
||||
);
|
||||
|
||||
// If the original form is in the dictionary, return None
|
||||
if self.dict.contains_word_str(&original) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut hyphenated = None;
|
||||
if !toks[1].kind.is_hyphen() {
|
||||
hyphenated = Some(format!(
|
||||
"{}-{}",
|
||||
toks[0].span.get_content_string(src),
|
||||
toks[2].span.get_content_string(src)
|
||||
));
|
||||
}
|
||||
let joined = Some(format!(
|
||||
"{}{}",
|
||||
toks[0].span.get_content_string(src),
|
||||
toks[2].span.get_content_string(src)
|
||||
));
|
||||
|
||||
// Check if either joined or hyphenated form is in the dictionary
|
||||
let joined_valid = joined
|
||||
.as_ref()
|
||||
.is_some_and(|j| self.dict.contains_word_str(j));
|
||||
let hyphenated_valid = hyphenated
|
||||
.as_ref()
|
||||
.is_some_and(|h| self.dict.contains_word_str(h));
|
||||
|
||||
if !joined_valid && !hyphenated_valid {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Joining with a hyphen when original is separated by space is more likely correct
|
||||
// if hyphenated form is in the dictionary. So add first if verified.
|
||||
// Joining when separated by a space is more common but also has more false positives, so add them second.
|
||||
let suggestions = [(&hyphenated, hyphenated_valid), (&joined, joined_valid)]
|
||||
.into_iter()
|
||||
.filter_map(|(word, is_valid)| word.as_ref().filter(|_| is_valid))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let suggestions = suggestions
|
||||
.iter()
|
||||
.map(|s| {
|
||||
Suggestion::replace_with_match_case(s.chars().collect(), toks_span.get_content(src))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Some(Lint {
|
||||
span: toks_span,
|
||||
lint_kind: LintKind::Spelling,
|
||||
suggestions,
|
||||
message: "This looks like a prefix that can be joined with the rest of the word."
|
||||
.to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Looks for words with their prefixes written with a space or hyphen between instead of joined."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DisjointPrefixes;
|
||||
use crate::{
|
||||
linting::tests::{assert_no_lints, assert_suggestion_result},
|
||||
spell::FstDictionary,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn fix_hyphenated_to_joined() {
|
||||
assert_suggestion_result(
|
||||
"Download pre-built binaries or build from source.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
"Download prebuilt binaries or build from source.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_open_to_joined() {
|
||||
assert_suggestion_result(
|
||||
"Advanced Nginx configuration available for super users",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
"Advanced Nginx configuration available for superusers",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_join_open_co_op() {
|
||||
assert_no_lints(
|
||||
"They are cheaper at the co op.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_join_hyphenated_co_op() {
|
||||
assert_no_lints(
|
||||
"Almost everything is cheaper at the co-op.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_open_to_hyphenated() {
|
||||
assert_suggestion_result(
|
||||
"My hobby is de extinction of the dinosaurs.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
"My hobby is de-extinction of the dinosaurs.",
|
||||
);
|
||||
}
|
||||
}
|
||||
230
harper-core/src/linting/ever_every.rs
Normal file
230
harper-core/src/linting/ever_every.rs
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
use crate::{
|
||||
Lint, Token,
|
||||
expr::{Expr, OwnedExprExt, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
patterns::{ModalVerb, WordSet},
|
||||
};
|
||||
|
||||
pub struct EverEvery {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for EverEvery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::any_of(vec![
|
||||
Box::new(WordSet::new(&[
|
||||
"are", "aren't", "arent", "did", "didn't", "didnt", "do", "does",
|
||||
"doesn't", "doesnt", "dont", "don't", "had", "hadn't", "hadnt", "has",
|
||||
"hasn't", "hasnt", "have", "haven't", "havent", "is", "isn't", "isnt",
|
||||
"was", "wasn't", "wasnt", "were", "weren't", "werent",
|
||||
])),
|
||||
Box::new(ModalVerb::with_common_errors()),
|
||||
])
|
||||
.t_ws()
|
||||
.then_subject_pronoun()
|
||||
.t_ws()
|
||||
.t_aco("every")
|
||||
.and_not(SequenceExpr::anything().t_any().t_aco("it")),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for EverEvery {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let span = toks[4].span;
|
||||
let content = span.get_content(src);
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Typo,
|
||||
suggestions: vec![Suggestion::replace_with_match_case(
|
||||
content[..content.len() - 1].to_vec(),
|
||||
content,
|
||||
)],
|
||||
message: "Is this `every` a typo that should be `ever`?".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Tries to correct typos of `every` instead of `ever`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::EverEvery;
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn fix_can_i_every() {
|
||||
assert_suggestion_result(
|
||||
"Odd, how can i every become negative in that case?",
|
||||
EverEvery::default(),
|
||||
"Odd, how can i ever become negative in that case?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_can_they_every() {
|
||||
assert_suggestion_result(
|
||||
"if each component has its own instance of NameService, how can they every share state?",
|
||||
EverEvery::default(),
|
||||
"if each component has its own instance of NameService, how can they ever share state?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_can_we_every() {
|
||||
assert_suggestion_result(
|
||||
"can we every have a good dev UX?",
|
||||
EverEvery::default(),
|
||||
"can we ever have a good dev UX?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_did_we_every() {
|
||||
assert_suggestion_result(
|
||||
"Did we every fix that?",
|
||||
EverEvery::default(),
|
||||
"Did we ever fix that?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_did_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Did you every get vtsls working properly?",
|
||||
EverEvery::default(),
|
||||
"Did you ever get vtsls working properly?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_i_every() {
|
||||
assert_suggestion_result(
|
||||
"Rarely do I every look forward to the new ui.",
|
||||
EverEvery::default(),
|
||||
"Rarely do I ever look forward to the new ui.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_we_every() {
|
||||
assert_suggestion_result(
|
||||
"do we every stop learning new things?",
|
||||
EverEvery::default(),
|
||||
"do we ever stop learning new things?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Do you every faced the issue or have any idea why this could happen?",
|
||||
EverEvery::default(),
|
||||
"Do you ever faced the issue or have any idea why this could happen?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_dont_i_every() {
|
||||
assert_suggestion_result(
|
||||
"WHY DONT I EVERY SEE OR HEAR ABOUT THINGS HAPPENING IN SOUTHPORT?",
|
||||
EverEvery::default(),
|
||||
"WHY DONT I EVER SEE OR HEAR ABOUT THINGS HAPPENING IN SOUTHPORT?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_dont_they_every() {
|
||||
assert_suggestion_result(
|
||||
"And why dont they every smile first?",
|
||||
EverEvery::default(),
|
||||
"And why dont they ever smile first?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_dont_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Dont you every forget this and believe nothing else.",
|
||||
EverEvery::default(),
|
||||
"Dont you ever forget this and believe nothing else.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_have_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Have you every wanted to generate geometric structures from data.frames",
|
||||
EverEvery::default(),
|
||||
"Have you ever wanted to generate geometric structures from data.frames",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_should_i_every() {
|
||||
assert_suggestion_result(
|
||||
"I.e. why would I every use deepcopy ?",
|
||||
EverEvery::default(),
|
||||
"I.e. why would I ever use deepcopy ?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_should_we_every() {
|
||||
assert_suggestion_result(
|
||||
"Should we every meet, I'll get you a beverage of your choosing!",
|
||||
EverEvery::default(),
|
||||
"Should we ever meet, I'll get you a beverage of your choosing!",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_should_you_every() {
|
||||
assert_suggestion_result(
|
||||
"but you will always have a place in his home should you every truly desire it",
|
||||
EverEvery::default(),
|
||||
"but you will always have a place in his home should you ever truly desire it",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_would_i_every() {
|
||||
assert_suggestion_result(
|
||||
"Why would I every do that?",
|
||||
EverEvery::default(),
|
||||
"Why would I ever do that?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_would_they_every() {
|
||||
assert_suggestion_result(
|
||||
"Would they every be installed together?",
|
||||
EverEvery::default(),
|
||||
"Would they ever be installed together?",
|
||||
)
|
||||
}
|
||||
|
||||
// known false positive - future contributors: please feel free to tackle this!
|
||||
|
||||
#[test]
|
||||
#[ignore = "unusual but not wrong position of time phrase, maybe should have commas?"]
|
||||
fn dont_flag_should_we_every() {
|
||||
assert_no_lints(
|
||||
"MM: should we every month or two have a roundup of what's been happening in WGSL",
|
||||
EverEvery::default(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -15,22 +15,17 @@ impl Default for Everyday {
|
|||
let everyday = Word::new("everyday");
|
||||
let every_day = Lrc::new(SequenceExpr::aco("every").t_ws().t_aco("day"));
|
||||
|
||||
let everyday_bad_after =
|
||||
All::new(vec![
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.then(everyday.clone())
|
||||
.t_ws()
|
||||
.then_any_word(),
|
||||
),
|
||||
Box::new(SequenceExpr::default().t_any().t_any().then(
|
||||
|tok: &Token, _src: &[char]| {
|
||||
!tok.kind.is_noun()
|
||||
&& !tok.kind.is_oov()
|
||||
&& !tok.kind.is_verb_progressive_form()
|
||||
},
|
||||
)),
|
||||
]);
|
||||
let everyday_bad_after = All::new(vec![
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.then(everyday.clone())
|
||||
.t_ws()
|
||||
.then_any_word(),
|
||||
),
|
||||
Box::new(SequenceExpr::anything().t_any().then_kind_where(|kind| {
|
||||
!kind.is_noun() && !kind.is_oov() && !kind.is_verb_progressive_form()
|
||||
})),
|
||||
]);
|
||||
|
||||
let bad_before_every_day = All::new(vec![
|
||||
Box::new(
|
||||
|
|
@ -56,8 +51,7 @@ impl Default for Everyday {
|
|||
.then_any_word(),
|
||||
),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.t_any()
|
||||
SequenceExpr::anything()
|
||||
.t_any()
|
||||
.then_kind_both(TokenKind::is_noun, TokenKind::is_verb)
|
||||
.t_any()
|
||||
|
|
@ -72,18 +66,14 @@ impl Default for Everyday {
|
|||
.then(everyday.clone())
|
||||
.then_punctuation(),
|
||||
),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.t_any()
|
||||
.then(|tok: &Token, _src: &[char]| {
|
||||
matches!(
|
||||
tok.kind,
|
||||
TokenKind::Punctuation(
|
||||
Punctuation::Question | Punctuation::Comma | Punctuation::Period
|
||||
)
|
||||
)
|
||||
}),
|
||||
),
|
||||
Box::new(SequenceExpr::anything().then_kind_where(|kind| {
|
||||
matches!(
|
||||
kind,
|
||||
TokenKind::Punctuation(
|
||||
Punctuation::Question | Punctuation::Comma | Punctuation::Period
|
||||
)
|
||||
)
|
||||
})),
|
||||
]);
|
||||
|
||||
// (However, the message goes far beyond) every day things.
|
||||
|
|
@ -96,15 +86,14 @@ impl Default for Everyday {
|
|||
.then_punctuation(),
|
||||
),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
SequenceExpr::anything()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.then(|tok: &Token, _src: &[char]| {
|
||||
.then_kind_where(|kind| {
|
||||
matches!(
|
||||
tok.kind,
|
||||
kind,
|
||||
TokenKind::Punctuation(
|
||||
Punctuation::Question | Punctuation::Comma | Punctuation::Period
|
||||
)
|
||||
|
|
|
|||
184
harper-core/src/linting/fascinated_by.rs
Normal file
184
harper-core/src/linting/fascinated_by.rs
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
use crate::{
|
||||
CharStringExt, Lint, Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
};
|
||||
|
||||
pub struct FascinatedBy {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for FascinatedBy {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(SequenceExpr::aco("fascinated").t_ws().then_preposition()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for FascinatedBy {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let prep_span = toks.last()?.span;
|
||||
let prep_chars = prep_span.get_content(src);
|
||||
if prep_chars.eq_any_ignore_ascii_case_str(&["by", "with"]) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: prep_span,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![
|
||||
Suggestion::replace_with_match_case_str("by", prep_chars),
|
||||
Suggestion::replace_with_match_case_str("with", prep_chars),
|
||||
],
|
||||
message: "The correct prepositions to use with `fascinated` are `by` or `with`."
|
||||
.to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Ensures the correct prepositions are used with `fascinated` (e.g., `fascinated by` or `fascinated with`)."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{fascinated_by::FascinatedBy, tests::assert_good_and_bad_suggestions};
|
||||
|
||||
#[test]
|
||||
fn fix_amiga() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Now, one aspect of the Amiga that I've always been fascinated about is making my own games for the Amiga.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Now, one aspect of the Amiga that I've always been fascinated by is making my own games for the Amiga.",
|
||||
"Now, one aspect of the Amiga that I've always been fascinated with is making my own games for the Amiga.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_microbit() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"also why I am very fascinated about the micro:bit itself",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"also why I am very fascinated by the micro:bit itself",
|
||||
"also why I am very fascinated with the micro:bit itself",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_software_development() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Self-learner, fascinated about software development, especially computer graphics and web - marcus-phi.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Self-learner, fascinated by software development, especially computer graphics and web - marcus-phi.",
|
||||
"Self-learner, fascinated with software development, especially computer graphics and web - marcus-phi.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_computer_science() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Fascinated about Computer Science, Finance and Statistics.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Fascinated by Computer Science, Finance and Statistics.",
|
||||
"Fascinated with Computer Science, Finance and Statistics.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_possibilities() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"m relatively new to deCONZ and Conbee2 but already very fascinated about the possibilities compared to Philips and Ikea's",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"m relatively new to deCONZ and Conbee2 but already very fascinated by the possibilities compared to Philips and Ikea's",
|
||||
"m relatively new to deCONZ and Conbee2 but already very fascinated with the possibilities compared to Philips and Ikea's",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_project() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"I have been using browser use in local mode for a while and i am pretty fascinated about the project.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"I have been using browser use in local mode for a while and i am pretty fascinated by the project.",
|
||||
"I have been using browser use in local mode for a while and i am pretty fascinated with the project.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_work() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Hey guys, I am really fascinated about your work and I tried to build Magisk so I will be able to contribute for the project.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Hey guys, I am really fascinated by your work and I tried to build Magisk so I will be able to contribute for the project.",
|
||||
"Hey guys, I am really fascinated with your work and I tried to build Magisk so I will be able to contribute for the project.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_ais() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"I am a retired Dutch telecom engineer and fascinated about AIS applications.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"I am a retired Dutch telecom engineer and fascinated by AIS applications.",
|
||||
"I am a retired Dutch telecom engineer and fascinated with AIS applications.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_innovative_ideas() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Software Developer fascinated about innovative ideas, love to learn and share new technologies and ideas.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Software Developer fascinated by innovative ideas, love to learn and share new technologies and ideas.",
|
||||
"Software Developer fascinated with innovative ideas, love to learn and share new technologies and ideas.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_coding() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"m fascinated about coding and and sharing my code to the world.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"m fascinated by coding and and sharing my code to the world.",
|
||||
"m fascinated with coding and and sharing my code to the world.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
}
|
||||
160
harper-core/src/linting/handful.rs
Normal file
160
harper-core/src/linting/handful.rs
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
use crate::expr::{Expr, SequenceExpr, SpaceOrHyphen};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct Handful {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for Handful {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then_any_capitalization_of("hand")
|
||||
.then_one_or_more(SpaceOrHyphen)
|
||||
.then_any_capitalization_of("full")
|
||||
.then_one_or_more(SpaceOrHyphen)
|
||||
.then_any_capitalization_of("of");
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for Handful {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
if matched_tokens.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut highlight_end = matched_tokens.len() - 1;
|
||||
while highlight_end > 0 {
|
||||
let prev = &matched_tokens[highlight_end - 1];
|
||||
if prev.kind.is_whitespace() || prev.kind.is_hyphen() {
|
||||
highlight_end -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if highlight_end == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let replacement = &matched_tokens[..highlight_end];
|
||||
let span = replacement.span()?;
|
||||
let template = matched_tokens.first()?.span.get_content(source);
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::BoundaryError,
|
||||
suggestions: vec![Suggestion::replace_with_match_case(
|
||||
"handful".chars().collect(),
|
||||
template,
|
||||
)],
|
||||
message: "Write this quantity as the single word `handful`.".to_owned(),
|
||||
priority: 31,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Keeps the palm-sized quantity expressed by `handful` as one word."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Handful;
|
||||
use crate::linting::tests::{assert_lint_count, assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn suggests_plain_spacing() {
|
||||
assert_suggestion_result(
|
||||
"Her basket held a hand full of berries.",
|
||||
Handful::default(),
|
||||
"Her basket held a handful of berries.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suggests_capitalized_form() {
|
||||
assert_suggestion_result(
|
||||
"Hand full of tales lined the shelf.",
|
||||
Handful::default(),
|
||||
"Handful of tales lined the shelf.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suggests_hyphenated_form() {
|
||||
assert_suggestion_result(
|
||||
"A hand-full of marbles scattered across the floor.",
|
||||
Handful::default(),
|
||||
"A handful of marbles scattered across the floor.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suggests_space_hyphen_combo() {
|
||||
assert_suggestion_result(
|
||||
"A hand - full of seeds spilled on the workbench.",
|
||||
Handful::default(),
|
||||
"A handful of seeds spilled on the workbench.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suggests_initial_hyphen_variants() {
|
||||
assert_suggestion_result(
|
||||
"Hand-Full of furniture, the cart creaked slowly.",
|
||||
Handful::default(),
|
||||
"Handful of furniture, the cart creaked slowly.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flags_multiple_instances() {
|
||||
assert_lint_count(
|
||||
"She carried a hand full of carrots and a hand full of radishes.",
|
||||
Handful::default(),
|
||||
2,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_correct_handful() {
|
||||
assert_no_lints(
|
||||
"A handful of volunteers arrived in time.",
|
||||
Handful::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_parenthetical_hand() {
|
||||
assert_no_lints(
|
||||
"His hand, full of ink, kept writing without pause.",
|
||||
Handful::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_hand_is_full() {
|
||||
assert_no_lints("The hand is full of water.", Handful::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_handfull_typo() {
|
||||
assert_no_lints(
|
||||
"The word handfull is an incorrect spelling.",
|
||||
Handful::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -16,8 +16,7 @@ impl Default for HowTo {
|
|||
fn default() -> Self {
|
||||
let mut pattern = All::default();
|
||||
|
||||
let pos_pattern = SequenceExpr::default()
|
||||
.then_anything()
|
||||
let pos_pattern = SequenceExpr::anything()
|
||||
.then_anything()
|
||||
.t_aco("how")
|
||||
.then_whitespace()
|
||||
|
|
|
|||
|
|
@ -18,9 +18,7 @@ impl Default for IAmAgreement {
|
|||
.then(i_are.clone());
|
||||
|
||||
let non_and_word_before_i_are = SequenceExpr::default()
|
||||
.then(|tok: &Token, src: &[char]| {
|
||||
!tok.kind.is_word() || tok.span.get_content_string(src).to_lowercase() != "and"
|
||||
})
|
||||
.then_word_except(&["and"])
|
||||
.t_ws()
|
||||
.then(i_are);
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ impl ExprLinter for InterestedIn {
|
|||
|
||||
Some(Lint {
|
||||
span: prep_span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![Suggestion::replace_with_match_case(
|
||||
"in".chars().collect(),
|
||||
prep_chars,
|
||||
|
|
|
|||
|
|
@ -15,23 +15,23 @@ impl Default for ItLooksLikeThat {
|
|||
SequenceExpr::default()
|
||||
.then_fixed_phrase("it looks like that")
|
||||
.then_whitespace()
|
||||
.then(|tok: &Token, _: &[char]| {
|
||||
.then_kind_where(|kind| {
|
||||
// Heuristics on the word after "that" which show "that" was used
|
||||
// as a relative pronoun, which is a mistake
|
||||
let is_subj = tok.kind.is_subject_pronoun();
|
||||
let is_ing = tok.kind.is_verb_progressive_form();
|
||||
let is_subj = kind.is_subject_pronoun();
|
||||
let is_ing = kind.is_verb_progressive_form();
|
||||
let is_definitely_rel_pron = is_subj || is_ing;
|
||||
|
||||
// Heuristics on the word after "that" which show "that"
|
||||
// could possibly be a legitimate demonstrative pronoun or determiner
|
||||
// as a demonstrative pronoun or a determiner
|
||||
// which would not be a mistake.
|
||||
let is_v3psgpres = tok.kind.is_verb_third_person_singular_present_form();
|
||||
let is_v3psgpres = kind.is_verb_third_person_singular_present_form();
|
||||
// NOTE: we don't have .is_modal_verb() but maybe we need it now!
|
||||
let is_vmodal_or_aux = tok.kind.is_auxiliary_verb();
|
||||
let is_vpret = tok.kind.is_verb_simple_past_form();
|
||||
let is_noun = tok.kind.is_noun();
|
||||
let is_oov = tok.kind.is_oov();
|
||||
let is_vmodal_or_aux = kind.is_auxiliary_verb();
|
||||
let is_vpret = kind.is_verb_simple_past_form();
|
||||
let is_noun = kind.is_noun();
|
||||
let is_oov = kind.is_oov();
|
||||
|
||||
let maybe_demonstrative_or_determiner =
|
||||
is_v3psgpres || is_vmodal_or_aux || is_vpret || is_noun || is_oov;
|
||||
|
|
|
|||
87
harper-core/src/linting/its_contraction/general.rs
Normal file
87
harper-core/src/linting/its_contraction/general.rs
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
use harper_brill::UPOS;
|
||||
|
||||
use crate::{
|
||||
Document, Token, TokenStringExt,
|
||||
expr::{All, Expr, ExprExt, OwnedExprExt, SequenceExpr},
|
||||
linting::{Lint, LintKind, Linter, Suggestion},
|
||||
patterns::{NominalPhrase, Pattern, UPOSSet, WordSet},
|
||||
};
|
||||
|
||||
pub struct General {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for General {
|
||||
fn default() -> Self {
|
||||
let positive = SequenceExpr::default().t_aco("its").then_whitespace().then(
|
||||
UPOSSet::new(&[UPOS::VERB, UPOS::AUX, UPOS::DET, UPOS::PRON])
|
||||
.or(WordSet::new(&["because"])),
|
||||
);
|
||||
|
||||
let exceptions = SequenceExpr::anything()
|
||||
.then_anything()
|
||||
.then(WordSet::new(&["own", "intended"]));
|
||||
|
||||
let inverted = SequenceExpr::default().then_unless(exceptions);
|
||||
|
||||
let expr = All::new(vec![Box::new(positive), Box::new(inverted)]).or_longest(
|
||||
SequenceExpr::aco("its")
|
||||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::ADJ]))
|
||||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::SCONJ, UPOS::PART])),
|
||||
);
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Linter for General {
|
||||
fn lint(&mut self, document: &Document) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
let source = document.get_source();
|
||||
|
||||
for chunk in document.iter_chunks() {
|
||||
lints.extend(
|
||||
self.expr
|
||||
.iter_matches(chunk, source)
|
||||
.filter_map(|match_span| {
|
||||
self.match_to_lint(&chunk[match_span.start..], source)
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
lints
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Detects the possessive `its` before `had`, `been`, or `got` and offers `it's` or `it has`."
|
||||
}
|
||||
}
|
||||
|
||||
impl General {
|
||||
fn match_to_lint(&self, toks: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let offender = toks.first()?;
|
||||
let offender_chars = offender.span.get_content(source);
|
||||
|
||||
if toks.get(2)?.kind.is_upos(UPOS::VERB)
|
||||
&& NominalPhrase.matches(&toks[2..], source).is_some()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: offender.span,
|
||||
lint_kind: LintKind::Punctuation,
|
||||
suggestions: vec![
|
||||
Suggestion::replace_with_match_case_str("it's", offender_chars),
|
||||
Suggestion::replace_with_match_case_str("it has", offender_chars),
|
||||
],
|
||||
message: "Use `it's` (short for `it has` or `it is`) here, not the possessive `its`."
|
||||
.to_owned(),
|
||||
priority: 54,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -1,102 +1,15 @@
|
|||
use harper_brill::UPOS;
|
||||
use super::merge_linters::merge_linters;
|
||||
|
||||
use crate::Document;
|
||||
use crate::TokenStringExt;
|
||||
use crate::expr::All;
|
||||
use crate::expr::Expr;
|
||||
use crate::expr::ExprExt;
|
||||
use crate::expr::OwnedExprExt;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::patterns::NominalPhrase;
|
||||
use crate::patterns::Pattern;
|
||||
use crate::patterns::UPOSSet;
|
||||
use crate::patterns::WordSet;
|
||||
use crate::{
|
||||
Token,
|
||||
linting::{Lint, LintKind, Suggestion},
|
||||
};
|
||||
mod general;
|
||||
mod proper_noun;
|
||||
|
||||
use super::Linter;
|
||||
use general::General;
|
||||
use proper_noun::ProperNoun;
|
||||
|
||||
pub struct ItsContraction {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for ItsContraction {
|
||||
fn default() -> Self {
|
||||
let positive = SequenceExpr::default().t_aco("its").then_whitespace().then(
|
||||
UPOSSet::new(&[UPOS::VERB, UPOS::AUX, UPOS::DET, UPOS::PRON])
|
||||
.or(WordSet::new(&["because"])),
|
||||
);
|
||||
|
||||
let exceptions = SequenceExpr::default()
|
||||
.then_anything()
|
||||
.then_anything()
|
||||
.then(WordSet::new(&["own", "intended"]));
|
||||
|
||||
let inverted = SequenceExpr::default().then_unless(exceptions);
|
||||
|
||||
let expr = All::new(vec![Box::new(positive), Box::new(inverted)]).or_longest(
|
||||
SequenceExpr::aco("its")
|
||||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::ADJ]))
|
||||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::SCONJ, UPOS::PART])),
|
||||
);
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Linter for ItsContraction {
|
||||
fn lint(&mut self, document: &Document) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
let source = document.get_source();
|
||||
|
||||
for chunk in document.iter_chunks() {
|
||||
lints.extend(
|
||||
self.expr
|
||||
.iter_matches(chunk, source)
|
||||
.filter_map(|match_span| {
|
||||
self.match_to_lint(&chunk[match_span.start..], source)
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
lints
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Detects the possessive `its` before `had`, `been`, or `got` and offers `it's` or `it has`."
|
||||
}
|
||||
}
|
||||
|
||||
impl ItsContraction {
|
||||
fn match_to_lint(&self, toks: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let offender = toks.first()?;
|
||||
let offender_chars = offender.span.get_content(source);
|
||||
|
||||
if toks.get(2)?.kind.is_upos(UPOS::VERB)
|
||||
&& NominalPhrase.matches(&toks[2..], source).is_some()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: offender.span,
|
||||
lint_kind: LintKind::WordChoice,
|
||||
suggestions: vec![
|
||||
Suggestion::replace_with_match_case_str("it's", offender_chars),
|
||||
Suggestion::replace_with_match_case_str("it has", offender_chars),
|
||||
],
|
||||
message: "Use `it's` (short for `it has` or `it is`) here, not the possessive `its`."
|
||||
.to_owned(),
|
||||
priority: 54,
|
||||
})
|
||||
}
|
||||
}
|
||||
merge_linters!(
|
||||
ItsContraction => General, ProperNoun =>
|
||||
"Detects places where the possessive `its` should be the contraction `it's`, including before verbs/clauses and before proper nouns after opinion verbs."
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
@ -284,4 +197,90 @@ mod tests {
|
|||
ItsContraction::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_think_google() {
|
||||
assert_suggestion_result(
|
||||
"I think its Google, not Microsoft.",
|
||||
ItsContraction::default(),
|
||||
"I think it's Google, not Microsoft.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_hope_katie() {
|
||||
assert_suggestion_result(
|
||||
"I hope its Katie.",
|
||||
ItsContraction::default(),
|
||||
"I hope it's Katie.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_guess_date() {
|
||||
assert_suggestion_result(
|
||||
"I guess its March 6.",
|
||||
ItsContraction::default(),
|
||||
"I guess it's March 6.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_assume_john() {
|
||||
assert_suggestion_result(
|
||||
"We assume its John.",
|
||||
ItsContraction::default(),
|
||||
"We assume it's John.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_doubt_tesla() {
|
||||
assert_suggestion_result(
|
||||
"They doubt its Tesla this year.",
|
||||
ItsContraction::default(),
|
||||
"They doubt it's Tesla this year.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_two_word_name() {
|
||||
assert_suggestion_result(
|
||||
"She thinks its New York.",
|
||||
ItsContraction::default(),
|
||||
"She thinks it's New York.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_existing_contraction() {
|
||||
assert_lint_count("I think it's Google.", ItsContraction::default(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_possessive_noun_after_name() {
|
||||
assert_lint_count(
|
||||
"I think its Google product launch.",
|
||||
ItsContraction::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_without_opinion_verb() {
|
||||
assert_lint_count(
|
||||
"Its Google Pixel lineup is impressive.",
|
||||
ItsContraction::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_common_noun_target() {
|
||||
assert_lint_count(
|
||||
"We hope its accuracy improves.",
|
||||
ItsContraction::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
121
harper-core/src/linting/its_contraction/proper_noun.rs
Normal file
121
harper-core/src/linting/its_contraction/proper_noun.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use harper_brill::UPOS;
|
||||
|
||||
use crate::{
|
||||
Document, Token, TokenStringExt,
|
||||
expr::{Expr, ExprExt, ExprMap, OwnedExprExt, SequenceExpr},
|
||||
linting::{Lint, LintKind, Linter, Suggestion},
|
||||
patterns::{DerivedFrom, UPOSSet},
|
||||
};
|
||||
|
||||
pub struct ProperNoun {
|
||||
expr: Box<dyn Expr>,
|
||||
map: Arc<ExprMap<Range<usize>>>,
|
||||
}
|
||||
|
||||
impl Default for ProperNoun {
|
||||
fn default() -> Self {
|
||||
let mut map = ExprMap::default();
|
||||
|
||||
let opinion_verbs = DerivedFrom::new_from_str("think")
|
||||
.or(DerivedFrom::new_from_str("hope"))
|
||||
.or(DerivedFrom::new_from_str("assume"))
|
||||
.or(DerivedFrom::new_from_str("doubt"))
|
||||
.or(DerivedFrom::new_from_str("guess"));
|
||||
|
||||
let capitalized_word = |tok: &Token, src: &[char]| {
|
||||
tok.kind.is_word()
|
||||
&& tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.first()
|
||||
.map(|c| c.is_uppercase())
|
||||
.unwrap_or(false)
|
||||
};
|
||||
|
||||
let name_head = UPOSSet::new(&[UPOS::PROPN]).or(capitalized_word);
|
||||
|
||||
let lookahead_word = SequenceExpr::default().t_ws().then_any_word();
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then(opinion_verbs)
|
||||
.t_ws()
|
||||
.t_aco("its")
|
||||
.t_ws()
|
||||
.then(name_head)
|
||||
.then_optional(lookahead_word),
|
||||
2..3,
|
||||
);
|
||||
|
||||
let map = Arc::new(map);
|
||||
|
||||
Self {
|
||||
expr: Box::new(map.clone()),
|
||||
map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Linter for ProperNoun {
|
||||
fn lint(&mut self, document: &Document) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
let source = document.get_source();
|
||||
|
||||
for chunk in document.iter_chunks() {
|
||||
lints.extend(
|
||||
self.expr
|
||||
.iter_matches(chunk, source)
|
||||
.filter_map(|match_span| {
|
||||
let matched = &chunk[match_span.start..match_span.end];
|
||||
self.match_to_lint(matched, source)
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
lints
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Suggests the contraction `it's` after opinion verbs when it introduces a proper noun."
|
||||
}
|
||||
}
|
||||
|
||||
impl ProperNoun {
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
if matched_tokens.len() >= 7
|
||||
&& let Some(next_word) = matched_tokens.get(6)
|
||||
{
|
||||
let is_lowercase = next_word
|
||||
.span
|
||||
.get_content(source)
|
||||
.first()
|
||||
.map(|c| c.is_lowercase())
|
||||
.unwrap_or(false);
|
||||
|
||||
if is_lowercase
|
||||
&& (next_word.kind.is_upos(UPOS::NOUN) || next_word.kind.is_upos(UPOS::ADJ))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let range = self.map.lookup(0, matched_tokens, source)?.clone();
|
||||
let offending = matched_tokens.get(range.start)?;
|
||||
let offender_text = offending.span.get_content(source);
|
||||
|
||||
Some(Lint {
|
||||
span: offending.span,
|
||||
lint_kind: LintKind::Punctuation,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"it's",
|
||||
offender_text,
|
||||
)],
|
||||
message: "Use `it's` (short for \"it is\") before a proper noun in this construction."
|
||||
.to_owned(),
|
||||
priority: 31,
|
||||
})
|
||||
}
|
||||
}
|
||||
164
harper-core/src/linting/jealous_of.rs
Normal file
164
harper-core/src/linting/jealous_of.rs
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
use crate::{
|
||||
Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
};
|
||||
|
||||
pub struct JealousOf {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for JealousOf {
|
||||
fn default() -> Self {
|
||||
let valid_object = |tok: &Token, _source: &[char]| {
|
||||
(tok.kind.is_nominal() || !tok.kind.is_verb())
|
||||
&& (tok.kind.is_oov() || tok.kind.is_nominal())
|
||||
&& !tok.kind.is_preposition()
|
||||
};
|
||||
|
||||
let pattern = SequenceExpr::default()
|
||||
.t_aco("jealous")
|
||||
.t_ws()
|
||||
.t_aco("from")
|
||||
.t_ws()
|
||||
.then_optional(SequenceExpr::default().then_determiner().t_ws())
|
||||
.then(valid_object);
|
||||
|
||||
Self {
|
||||
expr: Box::new(pattern),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for JealousOf {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let from_token = &tokens[2];
|
||||
|
||||
Some(Lint {
|
||||
span: from_token.span,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"of",
|
||||
from_token.span.get_content(source),
|
||||
)],
|
||||
message: "Use `of` after `jealous`.".to_owned(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Encourages the standard preposition after `jealous`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::JealousOf;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn replaces_basic_from() {
|
||||
assert_suggestion_result(
|
||||
"She was jealous from her sister's success.",
|
||||
JealousOf::default(),
|
||||
"She was jealous of her sister's success.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_optional_determiner() {
|
||||
assert_suggestion_result(
|
||||
"He grew jealous from the attention.",
|
||||
JealousOf::default(),
|
||||
"He grew jealous of the attention.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_pronoun_object() {
|
||||
assert_suggestion_result(
|
||||
"They became jealous from him.",
|
||||
JealousOf::default(),
|
||||
"They became jealous of him.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_oov_target() {
|
||||
assert_suggestion_result(
|
||||
"I'm jealous from Zybrix.",
|
||||
JealousOf::default(),
|
||||
"I'm jealous of Zybrix.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_uppercase_preposition() {
|
||||
assert_suggestion_result(
|
||||
"Jealous FROM his fame.",
|
||||
JealousOf::default(),
|
||||
"Jealous OF his fame.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_longer_phrase() {
|
||||
assert_suggestion_result(
|
||||
"They felt jealous from the sudden praise she received.",
|
||||
JealousOf::default(),
|
||||
"They felt jealous of the sudden praise she received.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_minimal_phrase() {
|
||||
assert_suggestion_result(
|
||||
"jealous from success",
|
||||
JealousOf::default(),
|
||||
"jealous of success",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_correct_usage() {
|
||||
assert_lint_count(
|
||||
"She was jealous of her sister's success.",
|
||||
JealousOf::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_other_preposition_sequence() {
|
||||
assert_lint_count(
|
||||
"They stayed jealous from within the fortress.",
|
||||
JealousOf::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_following_gerund() {
|
||||
assert_suggestion_result(
|
||||
"He was jealous from being ignored.",
|
||||
JealousOf::default(),
|
||||
"He was jealous of being ignored.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_numbers_after_from() {
|
||||
assert_lint_count(
|
||||
"She remained jealous from 2010 through 2015.",
|
||||
JealousOf::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
149
harper-core/src/linting/johns_hopkins.rs
Normal file
149
harper-core/src/linting/johns_hopkins.rs
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
use crate::{
|
||||
CharStringExt, Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
};
|
||||
|
||||
pub struct JohnsHopkins {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for JohnsHopkins {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then(|tok: &Token, src: &[char]| {
|
||||
tok.kind.is_proper_noun()
|
||||
&& tok.span.get_content(src).eq_ignore_ascii_case_str("john")
|
||||
})
|
||||
.t_ws()
|
||||
.then(|tok: &Token, src: &[char]| {
|
||||
tok.kind.is_proper_noun()
|
||||
&& tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str("hopkins")
|
||||
});
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for JohnsHopkins {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let span = matched_tokens.first()?.span;
|
||||
let template = span.get_content(source);
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::WordChoice,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str("Johns", template)],
|
||||
message: "Use `Johns Hopkins` for this name.".to_owned(),
|
||||
priority: 31,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Recommends the proper spelling `Johns Hopkins`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::JohnsHopkins;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn corrects_university_reference() {
|
||||
assert_suggestion_result(
|
||||
"I applied to John Hopkins University last fall.",
|
||||
JohnsHopkins::default(),
|
||||
"I applied to Johns Hopkins University last fall.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_hospital_reference() {
|
||||
assert_suggestion_result(
|
||||
"She works at the John Hopkins hospital.",
|
||||
JohnsHopkins::default(),
|
||||
"She works at the Johns Hopkins hospital.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_standalone_name() {
|
||||
assert_suggestion_result(
|
||||
"We toured John Hopkins yesterday.",
|
||||
JohnsHopkins::default(),
|
||||
"We toured Johns Hopkins yesterday.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_lowercase_usage() {
|
||||
assert_suggestion_result(
|
||||
"I studied at john hopkins online.",
|
||||
JohnsHopkins::default(),
|
||||
"I studied at johns hopkins online.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_across_newline_whitespace() {
|
||||
assert_suggestion_result(
|
||||
"We met at John\nHopkins for lunch.",
|
||||
JohnsHopkins::default(),
|
||||
"We met at Johns\nHopkins for lunch.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_with_trailing_punctuation() {
|
||||
assert_suggestion_result(
|
||||
"I toured John Hopkins, and it was great.",
|
||||
JohnsHopkins::default(),
|
||||
"I toured Johns Hopkins, and it was great.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_before_hyphenated_unit() {
|
||||
assert_suggestion_result(
|
||||
"She joined the John Hopkins-affiliated lab.",
|
||||
JohnsHopkins::default(),
|
||||
"She joined the Johns Hopkins-affiliated lab.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_correct_spelling() {
|
||||
assert_lint_count(
|
||||
"Johns Hopkins University has a great program.",
|
||||
JohnsHopkins::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_apostrophized_form() {
|
||||
assert_lint_count(
|
||||
"John Hopkins's novel won awards.",
|
||||
JohnsHopkins::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_reversed_name_order() {
|
||||
assert_lint_count("Hopkins, John is a contact.", JohnsHopkins::default(), 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -27,13 +27,14 @@ impl Default for NoContractionWithVerb {
|
|||
.then(WordSet::new(&["lets", "let"]))
|
||||
.then_whitespace();
|
||||
|
||||
// Match verbs that are only verbs (not also nouns/adjectives) and not in -ing form
|
||||
let non_ing_verb = SequenceExpr::default().then(|tok: &Token, _src: &[char]| {
|
||||
tok.kind.is_verb()
|
||||
&& !tok.kind.is_noun()
|
||||
&& !tok.kind.is_adjective()
|
||||
&& !tok.kind.is_verb_progressive_form()
|
||||
});
|
||||
let non_ing_verb = SequenceExpr::default().then_kind_is_but_isnt_any_of(
|
||||
TokenKind::is_verb,
|
||||
&[
|
||||
TokenKind::is_noun,
|
||||
TokenKind::is_adjective,
|
||||
TokenKind::is_verb_progressive_form,
|
||||
] as &[_],
|
||||
);
|
||||
|
||||
// Ambiguous word is a verb determined by heuristic of following word's part of speech
|
||||
// Tests the next two words after "let".
|
||||
|
|
|
|||
|
|
@ -16,8 +16,7 @@ impl Default for Likewise {
|
|||
expr.add(SequenceExpr::aco("like").then_whitespace().t_aco("wise"));
|
||||
expr.add(
|
||||
SequenceExpr::default().then_unless(
|
||||
SequenceExpr::default()
|
||||
.then_anything()
|
||||
SequenceExpr::anything()
|
||||
.then_whitespace()
|
||||
.then_anything()
|
||||
.then_whitespace()
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ use super::and_in::AndIn;
|
|||
use super::and_the_like::AndTheLike;
|
||||
use super::another_thing_coming::AnotherThingComing;
|
||||
use super::another_think_coming::AnotherThinkComing;
|
||||
use super::apart_from::ApartFrom;
|
||||
use super::ask_no_preposition::AskNoPreposition;
|
||||
use super::avoid_curses::AvoidCurses;
|
||||
use super::back_in_the_day::BackInTheDay;
|
||||
|
|
@ -33,6 +34,7 @@ use super::be_allowed::BeAllowed;
|
|||
use super::best_of_all_time::BestOfAllTime;
|
||||
use super::boring_words::BoringWords;
|
||||
use super::bought::Bought;
|
||||
use super::brand_brandish::BrandBrandish;
|
||||
use super::cant::Cant;
|
||||
use super::capitalize_personal_pronouns::CapitalizePersonalPronouns;
|
||||
use super::cautionary_tale::CautionaryTale;
|
||||
|
|
@ -44,20 +46,24 @@ use super::compound_subject_i::CompoundSubjectI;
|
|||
use super::confident::Confident;
|
||||
use super::correct_number_suffix::CorrectNumberSuffix;
|
||||
use super::criteria_phenomena::CriteriaPhenomena;
|
||||
use super::cure_for::CureFor;
|
||||
use super::currency_placement::CurrencyPlacement;
|
||||
use super::despite_of::DespiteOf;
|
||||
use super::didnt::Didnt;
|
||||
use super::discourse_markers::DiscourseMarkers;
|
||||
use super::disjoint_prefixes::DisjointPrefixes;
|
||||
use super::dot_initialisms::DotInitialisms;
|
||||
use super::double_click::DoubleClick;
|
||||
use super::double_modal::DoubleModal;
|
||||
use super::ellipsis_length::EllipsisLength;
|
||||
use super::else_possessive::ElsePossessive;
|
||||
use super::ever_every::EverEvery;
|
||||
use super::everyday::Everyday;
|
||||
use super::expand_memory_shorthands::ExpandMemoryShorthands;
|
||||
use super::expand_time_shorthands::ExpandTimeShorthands;
|
||||
use super::expr_linter::run_on_chunk;
|
||||
use super::far_be_it::FarBeIt;
|
||||
use super::fascinated_by::FascinatedBy;
|
||||
use super::feel_fell::FeelFell;
|
||||
use super::few_units_of_time_ago::FewUnitsOfTimeAgo;
|
||||
use super::filler_words::FillerWords;
|
||||
|
|
@ -83,6 +89,8 @@ use super::interested_in::InterestedIn;
|
|||
use super::it_looks_like_that::ItLooksLikeThat;
|
||||
use super::its_contraction::ItsContraction;
|
||||
use super::its_possessive::ItsPossessive;
|
||||
use super::jealous_of::JealousOf;
|
||||
use super::johns_hopkins::JohnsHopkins;
|
||||
use super::left_right_hand::LeftRightHand;
|
||||
use super::less_worse::LessWorse;
|
||||
use super::let_to_do::LetToDo;
|
||||
|
|
@ -90,12 +98,13 @@ use super::lets_confusion::LetsConfusion;
|
|||
use super::likewise::Likewise;
|
||||
use super::long_sentences::LongSentences;
|
||||
use super::looking_forward_to::LookingForwardTo;
|
||||
use super::mass_plurals::MassPlurals;
|
||||
use super::mass_nouns::MassNouns;
|
||||
use super::merge_words::MergeWords;
|
||||
use super::missing_preposition::MissingPreposition;
|
||||
use super::missing_to::MissingTo;
|
||||
use super::misspell::Misspell;
|
||||
use super::mixed_bag::MixedBag;
|
||||
use super::modal_be_adjective::ModalBeAdjective;
|
||||
use super::modal_of::ModalOf;
|
||||
use super::modal_seem::ModalSeem;
|
||||
use super::months::Months;
|
||||
|
|
@ -110,10 +119,10 @@ use super::no_match_for::NoMatchFor;
|
|||
use super::no_oxford_comma::NoOxfordComma;
|
||||
use super::nobody::Nobody;
|
||||
use super::nominal_wants::NominalWants;
|
||||
use super::noun_countability::NounCountability;
|
||||
use super::noun_verb_confusion::NounVerbConfusion;
|
||||
use super::number_suffix_capitalization::NumberSuffixCapitalization;
|
||||
use super::of_course::OfCourse;
|
||||
use super::oldest_in_the_book::OldestInTheBook;
|
||||
use super::on_floor::OnFloor;
|
||||
use super::once_or_twice::OnceOrTwice;
|
||||
use super::one_and_the_same::OneAndTheSame;
|
||||
|
|
@ -137,9 +146,12 @@ use super::quantifier_needs_of::QuantifierNeedsOf;
|
|||
use super::quantifier_numeral_conflict::QuantifierNumeralConflict;
|
||||
use super::quite_quiet::QuiteQuiet;
|
||||
use super::quote_spacing::QuoteSpacing;
|
||||
use super::redundant_acronyms::RedundantAcronyms;
|
||||
use super::redundant_additive_adverbs::RedundantAdditiveAdverbs;
|
||||
use super::regionalisms::Regionalisms;
|
||||
use super::repeated_words::RepeatedWords;
|
||||
use super::respond::Respond;
|
||||
use super::right_click::RightClick;
|
||||
use super::roller_skated::RollerSkated;
|
||||
use super::safe_to_save::SafeToSave;
|
||||
use super::save_to_safe::SaveToSafe;
|
||||
|
|
@ -152,12 +164,14 @@ use super::single_be::SingleBe;
|
|||
use super::some_without_article::SomeWithoutArticle;
|
||||
use super::something_is::SomethingIs;
|
||||
use super::somewhat_something::SomewhatSomething;
|
||||
use super::soon_to_be::SoonToBe;
|
||||
use super::sought_after::SoughtAfter;
|
||||
use super::spaces::Spaces;
|
||||
use super::spell_check::SpellCheck;
|
||||
use super::spelled_numbers::SpelledNumbers;
|
||||
use super::split_words::SplitWords;
|
||||
use super::subject_pronoun::SubjectPronoun;
|
||||
use super::take_medicine::TakeMedicine;
|
||||
use super::that_than::ThatThan;
|
||||
use super::that_which::ThatWhich;
|
||||
use super::the_how_why::TheHowWhy;
|
||||
|
|
@ -172,9 +186,11 @@ use super::throw_rubbish::ThrowRubbish;
|
|||
use super::to_adverb::ToAdverb;
|
||||
use super::to_two_too::ToTwoToo;
|
||||
use super::touristic::Touristic;
|
||||
use super::transposed_space::TransposedSpace;
|
||||
use super::unclosed_quotes::UnclosedQuotes;
|
||||
use super::update_place_names::UpdatePlaceNames;
|
||||
use super::use_genitive::UseGenitive;
|
||||
use super::use_title_case::UseTitleCase;
|
||||
use super::verb_to_adjective::VerbToAdjective;
|
||||
use super::very_unique::VeryUnique;
|
||||
use super::vice_versa::ViceVersa;
|
||||
|
|
@ -184,6 +200,7 @@ use super::well_educated::WellEducated;
|
|||
use super::whereas::Whereas;
|
||||
use super::widely_accepted::WidelyAccepted;
|
||||
use super::win_prize::WinPrize;
|
||||
use super::wish_could::WishCould;
|
||||
use super::wordpress_dotcom::WordPressDotcom;
|
||||
use super::would_never_have::WouldNeverHave;
|
||||
use super::{ExprLinter, Lint};
|
||||
|
|
@ -467,12 +484,12 @@ impl LintGroup {
|
|||
));
|
||||
out.merge_from(&mut closed_compounds::lint_group());
|
||||
out.merge_from(&mut initialisms::lint_group());
|
||||
// out.merge_from(&mut update_place_names::lint_group());
|
||||
|
||||
// Add all the more complex rules to the group.
|
||||
// Please maintain alphabetical order.
|
||||
// On *nix you can maintain sort order with `sort -t'(' -k2`
|
||||
insert_expr_rule!(APart, true);
|
||||
insert_expr_rule!(AWhile, true);
|
||||
insert_expr_rule!(Addicting, true);
|
||||
insert_expr_rule!(AdjectiveDoubleDegree, true);
|
||||
insert_struct_rule!(AdjectiveOfA, true);
|
||||
|
|
@ -486,6 +503,7 @@ impl LintGroup {
|
|||
insert_expr_rule!(AndTheLike, true);
|
||||
insert_expr_rule!(AnotherThingComing, true);
|
||||
insert_expr_rule!(AnotherThinkComing, false);
|
||||
insert_expr_rule!(ApartFrom, true);
|
||||
insert_expr_rule!(AskNoPreposition, true);
|
||||
insert_expr_rule!(AvoidCurses, true);
|
||||
insert_expr_rule!(BackInTheDay, true);
|
||||
|
|
@ -493,20 +511,19 @@ impl LintGroup {
|
|||
insert_expr_rule!(BestOfAllTime, true);
|
||||
insert_expr_rule!(BoringWords, false);
|
||||
insert_expr_rule!(Bought, true);
|
||||
insert_expr_rule!(BrandBrandish, true);
|
||||
insert_expr_rule!(Cant, true);
|
||||
insert_struct_rule!(CapitalizePersonalPronouns, true);
|
||||
insert_expr_rule!(CautionaryTale, true);
|
||||
insert_expr_rule!(ChangeTack, true);
|
||||
insert_expr_rule!(ChockFull, true);
|
||||
insert_expr_rule!(AWhile, true);
|
||||
insert_struct_rule!(SubjectPronoun, true);
|
||||
insert_struct_rule!(FindFine, true);
|
||||
insert_struct_rule!(CommaFixes, true);
|
||||
insert_struct_rule!(CompoundNouns, true);
|
||||
insert_expr_rule!(CompoundSubjectI, true);
|
||||
insert_expr_rule!(Confident, true);
|
||||
insert_struct_rule!(CorrectNumberSuffix, true);
|
||||
insert_expr_rule!(CriteriaPhenomena, true);
|
||||
insert_expr_rule!(CureFor, true);
|
||||
insert_struct_rule!(CurrencyPlacement, true);
|
||||
insert_expr_rule!(Dashes, true);
|
||||
insert_expr_rule!(DespiteOf, true);
|
||||
|
|
@ -517,13 +534,16 @@ impl LintGroup {
|
|||
insert_expr_rule!(DoubleModal, true);
|
||||
insert_struct_rule!(EllipsisLength, true);
|
||||
insert_expr_rule!(ElsePossessive, true);
|
||||
insert_expr_rule!(EverEvery, true);
|
||||
insert_expr_rule!(Everyday, true);
|
||||
insert_expr_rule!(ExpandMemoryShorthands, true);
|
||||
insert_expr_rule!(ExpandTimeShorthands, true);
|
||||
insert_expr_rule!(FarBeIt, true);
|
||||
insert_expr_rule!(FascinatedBy, true);
|
||||
insert_expr_rule!(FeelFell, true);
|
||||
insert_expr_rule!(FewUnitsOfTimeAgo, true);
|
||||
insert_expr_rule!(FillerWords, true);
|
||||
insert_struct_rule!(FindFine, true);
|
||||
insert_expr_rule!(FirstAidKit, true);
|
||||
insert_expr_rule!(ForNoun, true);
|
||||
insert_expr_rule!(FreePredicate, true);
|
||||
|
|
@ -542,6 +562,8 @@ impl LintGroup {
|
|||
insert_expr_rule!(ItLooksLikeThat, true);
|
||||
insert_struct_rule!(ItsContraction, true);
|
||||
insert_expr_rule!(ItsPossessive, true);
|
||||
insert_expr_rule!(JealousOf, true);
|
||||
insert_expr_rule!(JohnsHopkins, true);
|
||||
insert_expr_rule!(LeftRightHand, true);
|
||||
insert_expr_rule!(LessWorse, true);
|
||||
insert_expr_rule!(LetToDo, true);
|
||||
|
|
@ -554,6 +576,7 @@ impl LintGroup {
|
|||
insert_expr_rule!(MissingTo, true);
|
||||
insert_expr_rule!(Misspell, true);
|
||||
insert_expr_rule!(MixedBag, true);
|
||||
insert_expr_rule!(ModalBeAdjective, true);
|
||||
insert_expr_rule!(ModalOf, true);
|
||||
insert_expr_rule!(ModalSeem, true);
|
||||
insert_expr_rule!(Months, true);
|
||||
|
|
@ -568,10 +591,10 @@ impl LintGroup {
|
|||
insert_struct_rule!(NoOxfordComma, false);
|
||||
insert_expr_rule!(Nobody, true);
|
||||
insert_expr_rule!(NominalWants, true);
|
||||
insert_expr_rule!(NounCountability, true);
|
||||
insert_struct_rule!(NounVerbConfusion, true);
|
||||
insert_struct_rule!(NumberSuffixCapitalization, true);
|
||||
insert_expr_rule!(OfCourse, true);
|
||||
insert_expr_rule!(OldestInTheBook, true);
|
||||
insert_expr_rule!(OnFloor, true);
|
||||
insert_expr_rule!(OnceOrTwice, true);
|
||||
insert_expr_rule!(OneAndTheSame, true);
|
||||
|
|
@ -594,8 +617,11 @@ impl LintGroup {
|
|||
insert_expr_rule!(QuantifierNumeralConflict, true);
|
||||
insert_expr_rule!(QuiteQuiet, true);
|
||||
insert_struct_rule!(QuoteSpacing, true);
|
||||
insert_expr_rule!(RedundantAcronyms, true);
|
||||
insert_expr_rule!(RedundantAdditiveAdverbs, true);
|
||||
insert_struct_rule!(RepeatedWords, true);
|
||||
insert_expr_rule!(Respond, true);
|
||||
insert_expr_rule!(RightClick, true);
|
||||
insert_expr_rule!(RollerSkated, true);
|
||||
insert_expr_rule!(SafeToSave, true);
|
||||
insert_expr_rule!(SaveToSafe, true);
|
||||
|
|
@ -607,10 +633,13 @@ impl LintGroup {
|
|||
insert_expr_rule!(SomeWithoutArticle, true);
|
||||
insert_expr_rule!(SomethingIs, true);
|
||||
insert_expr_rule!(SomewhatSomething, true);
|
||||
insert_expr_rule!(SoonToBe, true);
|
||||
insert_expr_rule!(SoughtAfter, true);
|
||||
insert_struct_rule!(Spaces, true);
|
||||
insert_struct_rule!(SpelledNumbers, false);
|
||||
insert_expr_rule!(SplitWords, true);
|
||||
insert_struct_rule!(SubjectPronoun, true);
|
||||
insert_expr_rule!(TakeMedicine, true);
|
||||
insert_expr_rule!(ThatThan, true);
|
||||
insert_expr_rule!(ThatWhich, true);
|
||||
insert_expr_rule!(TheHowWhy, true);
|
||||
|
|
@ -637,6 +666,7 @@ impl LintGroup {
|
|||
insert_expr_rule!(Whereas, true);
|
||||
insert_expr_rule!(WidelyAccepted, true);
|
||||
insert_expr_rule!(WinPrize, true);
|
||||
insert_expr_rule!(WishCould, true);
|
||||
insert_struct_rule!(WordPressDotcom, true);
|
||||
insert_expr_rule!(WouldNeverHave, true);
|
||||
|
||||
|
|
@ -667,8 +697,20 @@ impl LintGroup {
|
|||
out.add("HaveTakeALook", HaveTakeALook::new(dialect));
|
||||
out.config.set_rule_enabled("HaveTakeALook", true);
|
||||
|
||||
out.add("MassPlurals", MassPlurals::new(dictionary.clone()));
|
||||
out.config.set_rule_enabled("MassPlurals", true);
|
||||
out.add("MassNouns", MassNouns::new(dictionary.clone()));
|
||||
out.config.set_rule_enabled("MassNouns", true);
|
||||
|
||||
out.add("UseTitleCase", UseTitleCase::new(dictionary.clone()));
|
||||
out.config.set_rule_enabled("UseTitleCase", true);
|
||||
|
||||
out.add_chunk_expr_linter(
|
||||
"DisjointPrefixes",
|
||||
DisjointPrefixes::new(dictionary.clone()),
|
||||
);
|
||||
out.config.set_rule_enabled("DisjointPrefixes", true);
|
||||
|
||||
out.add_chunk_expr_linter("TransposedSpace", TransposedSpace::new(dictionary.clone()));
|
||||
out.config.set_rule_enabled("TransposedSpace", true);
|
||||
|
||||
out
|
||||
}
|
||||
|
|
@ -810,7 +852,7 @@ mod tests {
|
|||
#[test]
|
||||
fn dont_flag_low_hanging_fruit_desc() {
|
||||
assert_no_lints(
|
||||
"Corrects non-standard variants of low-hanging fruit.",
|
||||
"Corrects nonstandard variants of low-hanging fruit.",
|
||||
test_group(),
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,9 +19,10 @@ where
|
|||
{
|
||||
pub fn new(dict: D) -> Self {
|
||||
let oov = SequenceExpr::default().then_oov();
|
||||
let looks_plural = SequenceExpr::default().then(|tok: &Token, _src: &[char]| {
|
||||
let lchars = tok.span.get_content(_src).to_lower();
|
||||
lchars.last().is_some_and(|c| *c == 's')
|
||||
let looks_plural = SequenceExpr::default().then(|tok: &Token, src: &[char]| {
|
||||
tok.span
|
||||
.get_content(src)
|
||||
.ends_with_ignore_ascii_case_chars(&['s'])
|
||||
});
|
||||
let oov_looks_plural = All::new(vec![Box::new(oov), Box::new(looks_plural)]);
|
||||
|
||||
86
harper-core/src/linting/mass_nouns/mod.rs
Normal file
86
harper-core/src/linting/mass_nouns/mod.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
mod mass_plurals;
|
||||
mod noun_countability;
|
||||
|
||||
use mass_plurals::MassPlurals;
|
||||
use noun_countability::NounCountability;
|
||||
|
||||
use crate::{
|
||||
Document,
|
||||
linting::{Lint, Linter},
|
||||
remove_overlaps,
|
||||
spell::Dictionary,
|
||||
};
|
||||
|
||||
pub struct MassNouns<D> {
|
||||
mass_plurals: MassPlurals<D>,
|
||||
noun_countability: NounCountability,
|
||||
}
|
||||
|
||||
impl<D> MassNouns<D>
|
||||
where
|
||||
D: Dictionary + Clone,
|
||||
{
|
||||
pub fn new(dict: D) -> Self {
|
||||
Self {
|
||||
mass_plurals: MassPlurals::new(dict.clone()),
|
||||
noun_countability: NounCountability::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D> Linter for MassNouns<D>
|
||||
where
|
||||
D: Dictionary,
|
||||
{
|
||||
fn lint(&mut self, document: &Document) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
|
||||
lints.extend(self.mass_plurals.lint(document));
|
||||
lints.extend(self.noun_countability.lint(document));
|
||||
|
||||
remove_overlaps(&mut lints);
|
||||
|
||||
lints
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Detects mass nouns used as countable nouns."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
linting::tests::{assert_lint_count, assert_suggestion_result},
|
||||
spell::FstDictionary,
|
||||
};
|
||||
|
||||
use super::MassNouns;
|
||||
|
||||
#[test]
|
||||
fn flag_advices_and_an_advice() {
|
||||
assert_lint_count(
|
||||
"I asked for an advice and he gave me two advices!",
|
||||
MassNouns::new(FstDictionary::curated()),
|
||||
2,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_a_luggage() {
|
||||
assert_suggestion_result(
|
||||
"I managed to pack all my clothing into one luggage.",
|
||||
MassNouns::new(FstDictionary::curated()),
|
||||
"I managed to pack all my clothing into one suitcase.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_clothings() {
|
||||
assert_suggestion_result(
|
||||
"I managed to pack all my clothings into one suitcase.",
|
||||
MassNouns::new(FstDictionary::curated()),
|
||||
"I managed to pack all my clothing into one suitcase.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -203,7 +203,7 @@ impl Default for MissingTo {
|
|||
let pattern = SequenceExpr::default()
|
||||
.then(Self::controller_words())
|
||||
.t_ws()
|
||||
.then(|tok: &Token, _source: &[char]| tok.kind.is_verb_lemma());
|
||||
.then_kind_where(|kind| kind.is_verb_lemma());
|
||||
|
||||
map.insert(pattern, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ mod and_in;
|
|||
mod and_the_like;
|
||||
mod another_thing_coming;
|
||||
mod another_think_coming;
|
||||
mod apart_from;
|
||||
mod ask_no_preposition;
|
||||
mod avoid_curses;
|
||||
mod back_in_the_day;
|
||||
|
|
@ -24,6 +25,7 @@ mod be_allowed;
|
|||
mod best_of_all_time;
|
||||
mod boring_words;
|
||||
mod bought;
|
||||
mod brand_brandish;
|
||||
mod call_them;
|
||||
mod cant;
|
||||
mod capitalize_personal_pronouns;
|
||||
|
|
@ -37,22 +39,26 @@ mod compound_subject_i;
|
|||
mod confident;
|
||||
mod correct_number_suffix;
|
||||
mod criteria_phenomena;
|
||||
mod cure_for;
|
||||
mod currency_placement;
|
||||
mod dashes;
|
||||
mod despite_of;
|
||||
mod determiner_without_noun;
|
||||
mod didnt;
|
||||
mod discourse_markers;
|
||||
mod disjoint_prefixes;
|
||||
mod dot_initialisms;
|
||||
mod double_click;
|
||||
mod double_modal;
|
||||
mod ellipsis_length;
|
||||
mod else_possessive;
|
||||
mod ever_every;
|
||||
mod everyday;
|
||||
mod expand_memory_shorthands;
|
||||
mod expand_time_shorthands;
|
||||
mod expr_linter;
|
||||
mod far_be_it;
|
||||
mod fascinated_by;
|
||||
mod feel_fell;
|
||||
mod few_units_of_time_ago;
|
||||
mod filler_words;
|
||||
|
|
@ -62,6 +68,7 @@ mod for_noun;
|
|||
mod free_predicate;
|
||||
mod friend_of_me;
|
||||
mod go_so_far_as_to;
|
||||
mod handful;
|
||||
mod have_pronoun;
|
||||
mod have_take_a_look;
|
||||
mod hedging;
|
||||
|
|
@ -83,6 +90,8 @@ mod it_looks_like_that;
|
|||
mod it_would_be;
|
||||
mod its_contraction;
|
||||
mod its_possessive;
|
||||
mod jealous_of;
|
||||
mod johns_hopkins;
|
||||
mod left_right_hand;
|
||||
mod less_worse;
|
||||
mod let_to_do;
|
||||
|
|
@ -95,7 +104,7 @@ mod long_sentences;
|
|||
mod looking_forward_to;
|
||||
mod map_phrase_linter;
|
||||
mod map_phrase_set_linter;
|
||||
mod mass_plurals;
|
||||
mod mass_nouns;
|
||||
mod merge_linters;
|
||||
mod merge_words;
|
||||
mod missing_preposition;
|
||||
|
|
@ -103,6 +112,7 @@ mod missing_space;
|
|||
mod missing_to;
|
||||
mod misspell;
|
||||
mod mixed_bag;
|
||||
mod modal_be_adjective;
|
||||
mod modal_of;
|
||||
mod modal_seem;
|
||||
mod months;
|
||||
|
|
@ -117,10 +127,10 @@ mod no_match_for;
|
|||
mod no_oxford_comma;
|
||||
mod nobody;
|
||||
mod nominal_wants;
|
||||
mod noun_countability;
|
||||
mod noun_verb_confusion;
|
||||
mod number_suffix_capitalization;
|
||||
mod of_course;
|
||||
mod oldest_in_the_book;
|
||||
mod on_floor;
|
||||
mod once_or_twice;
|
||||
mod one_and_the_same;
|
||||
|
|
@ -147,9 +157,12 @@ mod quantifier_needs_of;
|
|||
mod quantifier_numeral_conflict;
|
||||
mod quite_quiet;
|
||||
mod quote_spacing;
|
||||
mod redundant_acronyms;
|
||||
mod redundant_additive_adverbs;
|
||||
mod regionalisms;
|
||||
mod repeated_words;
|
||||
mod respond;
|
||||
mod right_click;
|
||||
mod roller_skated;
|
||||
mod safe_to_save;
|
||||
mod save_to_safe;
|
||||
|
|
@ -162,6 +175,7 @@ mod single_be;
|
|||
mod some_without_article;
|
||||
mod something_is;
|
||||
mod somewhat_something;
|
||||
mod soon_to_be;
|
||||
mod sought_after;
|
||||
mod spaces;
|
||||
mod spell_check;
|
||||
|
|
@ -169,6 +183,7 @@ mod spelled_numbers;
|
|||
mod split_words;
|
||||
mod subject_pronoun;
|
||||
mod suggestion;
|
||||
mod take_medicine;
|
||||
mod take_serious;
|
||||
mod that_than;
|
||||
mod that_which;
|
||||
|
|
@ -184,9 +199,11 @@ mod throw_rubbish;
|
|||
mod to_adverb;
|
||||
mod to_two_too;
|
||||
mod touristic;
|
||||
mod transposed_space;
|
||||
mod unclosed_quotes;
|
||||
mod update_place_names;
|
||||
mod use_genitive;
|
||||
mod use_title_case;
|
||||
mod verb_to_adjective;
|
||||
mod very_unique;
|
||||
mod vice_versa;
|
||||
|
|
@ -196,6 +213,7 @@ mod well_educated;
|
|||
mod whereas;
|
||||
mod widely_accepted;
|
||||
mod win_prize;
|
||||
mod wish_could;
|
||||
mod wordpress_dotcom;
|
||||
mod would_never_have;
|
||||
|
||||
|
|
@ -242,7 +260,8 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use crate::{Document, Span, Token, parsers::PlainEnglish};
|
||||
use crate::parsers::Markdown;
|
||||
use crate::{Document, Span, Token};
|
||||
use hashbrown::HashSet;
|
||||
|
||||
/// Extension trait for converting spans of tokens back to their original text
|
||||
|
|
@ -317,7 +336,7 @@ pub mod tests {
|
|||
let transformed_str = transform_nth_str(text, &mut linter, n);
|
||||
|
||||
if transformed_str.as_str() != expected_result {
|
||||
panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
|
||||
panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
|
||||
}
|
||||
|
||||
// Applying the suggestions should fix all the lints.
|
||||
|
|
@ -459,6 +478,23 @@ pub mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// Asserts that the lint's message matches the expected message.
|
||||
#[track_caller]
|
||||
pub fn assert_lint_message(text: &str, mut linter: impl Linter, expected_message: &str) {
|
||||
let test = Document::new_markdown_default_curated(text);
|
||||
let lints = linter.lint(&test);
|
||||
|
||||
// Just check the first lint for now
|
||||
if let Some(lint) = lints.first() {
|
||||
if lint.message != expected_message {
|
||||
panic!(
|
||||
"Expected lint message \"{expected_message}\", but got \"{}\"",
|
||||
lint.message
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
|
||||
let mut text_chars: Vec<char> = text.chars().collect();
|
||||
|
||||
|
|
@ -467,7 +503,7 @@ pub mod tests {
|
|||
loop {
|
||||
let test = Document::new_from_vec(
|
||||
text_chars.clone().into(),
|
||||
&PlainEnglish,
|
||||
&Markdown::default(),
|
||||
&FstDictionary::curated(),
|
||||
);
|
||||
let lints = linter.lint(&test);
|
||||
|
|
|
|||
171
harper-core/src/linting/modal_be_adjective.rs
Normal file
171
harper-core/src/linting/modal_be_adjective.rs
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
use crate::{
|
||||
CharStringExt, Lint, Token, TokenKind,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::{ExprLinter, Suggestion, expr_linter::Chunk},
|
||||
patterns::ModalVerb,
|
||||
};
|
||||
|
||||
pub struct ModalBeAdjective {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for ModalBeAdjective {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::default()
|
||||
.then(ModalVerb::default())
|
||||
.t_ws()
|
||||
.then_kind_is_but_isnt_any_of_except(
|
||||
TokenKind::is_adjective,
|
||||
&[
|
||||
TokenKind::is_verb_lemma, // set
|
||||
TokenKind::is_adverb, // ever
|
||||
TokenKind::is_preposition, // on
|
||||
TokenKind::is_determiner, // all
|
||||
TokenKind::is_pronoun, // all
|
||||
] as &[_],
|
||||
&[
|
||||
"backup", // adjective commonly misused as a verb
|
||||
"likely", // adjective but with special usage
|
||||
] as &[_],
|
||||
),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for ModalBeAdjective {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
toks: &[Token],
|
||||
src: &[char],
|
||||
ctx: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
if let Some((_, after)) = ctx
|
||||
&& after.len() >= 2
|
||||
&& after[0].kind.is_whitespace()
|
||||
{
|
||||
// If the 'after' context is whitespace followed by a noun, there is no error
|
||||
// (Not including these marginal nouns: "at", "by", "if")
|
||||
if after[1].kind.is_noun()
|
||||
&& !after[1]
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_any_ignore_ascii_case_str(&["at", "by", "if"])
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
// If the adjective plus the next word is "kind of"
|
||||
if toks
|
||||
.last()
|
||||
.unwrap()
|
||||
.span
|
||||
.get_content_string(src)
|
||||
.eq_ignore_ascii_case("kind")
|
||||
&& after[1]
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str("of")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: toks[0].span,
|
||||
suggestions: vec![Suggestion::InsertAfter(" be".chars().collect())],
|
||||
message: "You may be missing the word `be` between this modal verb and adjective."
|
||||
.to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Looks for `be` missing between a modal verb and adjective."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
use super::ModalBeAdjective;
|
||||
|
||||
#[test]
|
||||
fn fix_would_nice() {
|
||||
assert_suggestion_result(
|
||||
"It would nice if Harper could detect this.",
|
||||
ModalBeAdjective::default(),
|
||||
"It would be nice if Harper could detect this.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_could_configured() {
|
||||
assert_suggestion_result(
|
||||
"It could configured by parameters and the commands above effectively disable it.",
|
||||
ModalBeAdjective::default(),
|
||||
"It could be configured by parameters and the commands above effectively disable it.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_will_accessible() {
|
||||
assert_suggestion_result(
|
||||
"Your WordPress site will accessible at http://localhost",
|
||||
ModalBeAdjective::default(),
|
||||
"Your WordPress site will be accessible at http://localhost",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore_would_external_traffic() {
|
||||
assert_no_lints(
|
||||
"And why would external traffic be trying to access my server if I don't know who or what it is?",
|
||||
ModalBeAdjective::default(),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore_could_kind_of() {
|
||||
assert_no_lints("you could kind of see the ...", ModalBeAdjective::default())
|
||||
}
|
||||
|
||||
// Known false positives. You may want to improve the code to handle some of these.
|
||||
|
||||
#[test]
|
||||
#[ignore = "false positive: 'backup' is an adjective but also a spello for the verb 'back up'"]
|
||||
fn ignore_you_can_backup() {
|
||||
assert_no_lints("You can backup Userdata.", ModalBeAdjective::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "false positive: 'incorrect' should be 'incorrectly'."]
|
||||
fn ignore_would_incorrect() {
|
||||
assert_no_lints(
|
||||
"Bug in versions 4.0 and 4.1 would incorrect list the address module",
|
||||
ModalBeAdjective::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "false positive: 'upper-bound' is an ad-hoc verb here."]
|
||||
fn ignore_should_upper() {
|
||||
assert_no_lints(
|
||||
"we should upper-bound it to the next MAJOR version.",
|
||||
ModalBeAdjective::default(),
|
||||
);
|
||||
assert_no_lints(
|
||||
"some older software (filezilla on debian-stable) cannot passive-mode with TLS",
|
||||
ModalBeAdjective::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -29,15 +29,15 @@ impl ModalSeem {
|
|||
fn adjective_step() -> SequenceExpr {
|
||||
SequenceExpr::default()
|
||||
.t_ws()
|
||||
.then(|tok: &Token, _source: &[char]| tok.kind.is_adjective())
|
||||
.then_kind_where(|kind| kind.is_adjective())
|
||||
}
|
||||
|
||||
fn adverb_then_adjective_step() -> SequenceExpr {
|
||||
SequenceExpr::default()
|
||||
.t_ws()
|
||||
.then(|tok: &Token, _source: &[char]| tok.kind.is_adverb())
|
||||
.then_kind_where(|kind| kind.is_adverb())
|
||||
.t_ws()
|
||||
.then(|tok: &Token, _source: &[char]| tok.kind.is_adjective())
|
||||
.then_kind_where(|kind| kind.is_adjective())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -54,8 +54,8 @@ impl Default for Months {
|
|||
"by", "during", "in", "last", "next", "of", "until",
|
||||
]);
|
||||
|
||||
let year_or_day_of_month = SequenceExpr::default().then(|tok: &Token, _src: &[char]| {
|
||||
if let TokenKind::Number(number) = &tok.kind {
|
||||
let year_or_day_of_month = SequenceExpr::default().then_kind_where(|kind| {
|
||||
if let TokenKind::Number(number) = &kind {
|
||||
let v = number.value.into_inner() as u32;
|
||||
(1500..=2500).contains(&v) || (1..=31).contains(&v)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -23,8 +23,7 @@ impl Default for MostNumber {
|
|||
),
|
||||
// Context pattern
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.then_anything()
|
||||
SequenceExpr::anything()
|
||||
.then_anything()
|
||||
.then_anything()
|
||||
.then_anything()
|
||||
|
|
|
|||
|
|
@ -18,24 +18,28 @@ pub struct NeedToNoun {
|
|||
impl Default for NeedToNoun {
|
||||
fn default() -> Self {
|
||||
let postfix_exceptions = LongestMatchOf::new(vec![
|
||||
Box::new(|tok: &Token, _: &[char]| tok.kind.is_adverb() || tok.kind.is_determiner()),
|
||||
Box::new(WordSet::new(&["about"])),
|
||||
Box::new(|tok: &Token, _: &[char]| {
|
||||
tok.kind.is_adverb()
|
||||
|| tok.kind.is_determiner()
|
||||
|| tok.kind.is_unlintable()
|
||||
|| tok.kind.is_pronoun()
|
||||
}),
|
||||
Box::new(WordSet::new(&["about", "it"])),
|
||||
]);
|
||||
|
||||
let exceptions = SequenceExpr::default()
|
||||
.t_any()
|
||||
let exceptions = SequenceExpr::anything()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.then_word_set(&["be"]);
|
||||
|
||||
let a = SequenceExpr::default()
|
||||
.then(|tok: &Token, _: &[char]| tok.kind.is_nominal())
|
||||
.then_kind_where(|kind| kind.is_nominal())
|
||||
.t_ws()
|
||||
.then_unless(postfix_exceptions);
|
||||
|
||||
let b = SequenceExpr::default()
|
||||
.then(|tok: &Token, _: &[char]| tok.kind.is_nominal() && !tok.kind.is_verb());
|
||||
let b =
|
||||
SequenceExpr::default().then_kind_where(|kind| kind.is_nominal() && !kind.is_verb());
|
||||
|
||||
let expr = SequenceExpr::default()
|
||||
.then(DerivedFrom::new_from_str("need"))
|
||||
|
|
@ -421,4 +425,28 @@ mod tests {
|
|||
fn allows_issue_2252() {
|
||||
assert_no_lints("Things I need to do today:", NeedToNoun::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_install() {
|
||||
assert_no_lints(
|
||||
"You need to install it separately, as it's a standalone application.",
|
||||
NeedToNoun::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_lay() {
|
||||
assert_no_lints(
|
||||
"Okay, this is a long one, but I feel like I need to lay everything out.",
|
||||
NeedToNoun::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_overcome() {
|
||||
assert_no_lints(
|
||||
"We believe every family deserves the opportunity to flourish, and we are committed to providing the resources they need to overcome adversity.",
|
||||
NeedToNoun::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ impl Default for AffectToEffect {
|
|||
.then(|tok: &Token, source: &[char]| matches_preceding_context(tok, source))
|
||||
.t_ws()
|
||||
.then(|tok: &Token, source: &[char]| is_affect_word(tok, source))
|
||||
.then(|tok: &Token, _source: &[char]| matches!(tok.kind, TokenKind::Punctuation(_)));
|
||||
.then_kind_where(|kind| kind.is_punctuation());
|
||||
|
||||
map.insert(punctuation_follow, 2);
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ pub(crate) const NOUN_VERB_PAIRS: &[(&str, &str)] = &[
|
|||
("emphasis", "emphasize"), // TODO how to handle "emphasise" as well as "emphasize"?
|
||||
("intent", "intend"),
|
||||
// ("proof", "prove"), // "Proof" is also a verb, a synonym of "proofread".
|
||||
("weight", "weigh"),
|
||||
// Add more pairs here as needed
|
||||
];
|
||||
|
||||
|
|
@ -1361,4 +1362,40 @@ mod tests {
|
|||
NounVerbConfusion::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_2041() {
|
||||
assert_suggestion_result(
|
||||
"Let me give you a piece of advise.",
|
||||
NounVerbConfusion::default(),
|
||||
"Let me give you a piece of advice.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_helps_you_weight() {
|
||||
assert_suggestion_result(
|
||||
"An iOS app that helps you weight small things on the screen of your iPhone / iPad.",
|
||||
NounVerbConfusion::default(),
|
||||
"An iOS app that helps you weigh small things on the screen of your iPhone / iPad.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_you_weight() {
|
||||
assert_suggestion_result(
|
||||
"How much do you weight?",
|
||||
NounVerbConfusion::default(),
|
||||
"How much do you weigh?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_more_than_you_weight() {
|
||||
assert_suggestion_result(
|
||||
"contributed more than you weight",
|
||||
NounVerbConfusion::default(),
|
||||
"contributed more than you weigh",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,43 +1,15 @@
|
|||
use crate::expr::{Expr, FirstMatchOf, LongestMatchOf, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::patterns::Word;
|
||||
use crate::{CharStringExt, Lrc, Token, patterns::WordSet};
|
||||
use crate::{
|
||||
CharStringExt, Lrc, Token,
|
||||
expr::{Expr, FirstMatchOf, LongestMatchOf, SequenceExpr},
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion, expr_linter::Chunk},
|
||||
patterns::{ModalVerb, Word, WordSet},
|
||||
};
|
||||
|
||||
use super::super::NOUN_VERB_PAIRS;
|
||||
|
||||
/// Pronouns that can come before verbs but not nouns
|
||||
const PRONOUNS: &[&str] = &["he", "I", "it", "she", "they", "we", "who", "you"];
|
||||
|
||||
/// Adverbs that can come before verbs but not nouns
|
||||
/// Note: "Sometimes" can come before a noun.
|
||||
const ADVERBS: &[&str] = &["always", "never", "often", "seldom"];
|
||||
|
||||
/// Modal verbs that can come before other verbs but not nouns
|
||||
const MODAL_VERBS_ETC: &[&str] = &[
|
||||
"can",
|
||||
"cannot",
|
||||
"can't",
|
||||
"could",
|
||||
"couldn't",
|
||||
"may",
|
||||
"might",
|
||||
"mightn't",
|
||||
"must",
|
||||
"mustn't",
|
||||
"shall",
|
||||
"shan't",
|
||||
"should",
|
||||
"shouldn't",
|
||||
"will",
|
||||
"won't",
|
||||
"would",
|
||||
"wouldn't",
|
||||
// not modals per se, but modal-like
|
||||
"do",
|
||||
"don't",
|
||||
];
|
||||
|
||||
/// Linter that corrects common noun/verb confusions
|
||||
pub(super) struct GeneralNounInsteadOfVerb {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -45,10 +17,21 @@ pub(super) struct GeneralNounInsteadOfVerb {
|
|||
|
||||
impl Default for GeneralNounInsteadOfVerb {
|
||||
fn default() -> Self {
|
||||
// Adverbs that can come before verbs but not nouns
|
||||
// Note: "Sometimes" can come before a noun.
|
||||
let adverb_of_frequency = |tok: &Token, src: &[char]| {
|
||||
tok.kind.is_frequency_adverb()
|
||||
&& !tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str("sometimes")
|
||||
};
|
||||
|
||||
let pre_context = FirstMatchOf::new(vec![
|
||||
Box::new(WordSet::new(PRONOUNS)),
|
||||
Box::new(WordSet::new(MODAL_VERBS_ETC)),
|
||||
Box::new(WordSet::new(ADVERBS)),
|
||||
Box::new(ModalVerb::with_common_errors()),
|
||||
Box::new(WordSet::new(&["do", "don't", "dont"])),
|
||||
Box::new(adverb_of_frequency),
|
||||
Box::new(Word::new("to")),
|
||||
]);
|
||||
|
||||
|
|
@ -98,13 +81,15 @@ impl ExprLinter for GeneralNounInsteadOfVerb {
|
|||
// If we have the next word token, try to rule out compound nouns
|
||||
if toks.len() > 4 {
|
||||
let following_tok = &toks[4];
|
||||
if following_tok.kind.is_noun() && !following_tok.kind.is_preposition() {
|
||||
if following_tok.kind.is_noun()
|
||||
&& !following_tok.kind.is_proper_noun()
|
||||
&& !following_tok.kind.is_preposition()
|
||||
{
|
||||
// But first rule out marginal "nouns"
|
||||
let following_lower = following_tok.span.get_content_string(src).to_lowercase();
|
||||
if following_lower != "it"
|
||||
&& following_lower != "me"
|
||||
&& following_lower != "on"
|
||||
&& following_lower != "that"
|
||||
if !following_tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_any_ignore_ascii_case_str(&["it", "me", "on", "that"])
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ impl Default for VerbInsteadOfNoun {
|
|||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::default()
|
||||
.then(UPOSSet::new(&[UPOS::ADJ]))
|
||||
.then(UPOSSet::new(&[UPOS::ADJ, UPOS::ADP]))
|
||||
.then_whitespace()
|
||||
.then(verbs.clone()),
|
||||
),
|
||||
|
|
|
|||
179
harper-core/src/linting/oldest_in_the_book.rs
Normal file
179
harper-core/src/linting/oldest_in_the_book.rs
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
use crate::{
|
||||
CharStringExt, Lint, Token,
|
||||
expr::{Expr, Repeating, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
};
|
||||
|
||||
pub struct OldestInTheBook {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for OldestInTheBook {
|
||||
fn default() -> Self {
|
||||
let adj = |t: &Token, s: &[char]| {
|
||||
let k = &t.kind;
|
||||
(k.is_np_member() || k.is_adjective())
|
||||
&& !k.is_noun()
|
||||
&& !t
|
||||
.span
|
||||
.get_content(s)
|
||||
.eq_ignore_ascii_case_chars(&['i', 'n'])
|
||||
};
|
||||
|
||||
// Zero or more adjectives
|
||||
let adjseq = Repeating::new(Box::new(SequenceExpr::default().then(adj).t_ws()), 0);
|
||||
|
||||
let noun = |t: &Token, s: &[char]| {
|
||||
let k = &t.kind;
|
||||
(k.is_np_member() || k.is_noun() || k.is_oov())
|
||||
&& !t
|
||||
.span
|
||||
.get_content(s)
|
||||
.eq_ignore_ascii_case_chars(&['i', 'n'])
|
||||
};
|
||||
|
||||
// One or more nouns
|
||||
let nounseq = SequenceExpr::default()
|
||||
.then(noun)
|
||||
.then_optional(Repeating::new(
|
||||
Box::new(SequenceExpr::default().t_ws().then(noun)),
|
||||
1,
|
||||
));
|
||||
|
||||
let noun_phrase = SequenceExpr::default().then_optional(adjseq).then(nounseq);
|
||||
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::fixed_phrase("oldest ")
|
||||
.then(noun_phrase)
|
||||
.then_fixed_phrase(" in the books"),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for OldestInTheBook {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
toks: &[Token],
|
||||
src: &[char],
|
||||
_ctx: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
let np = &toks[2..toks.len() - 4];
|
||||
let tricky = np.iter().any(|n| {
|
||||
n.span
|
||||
.get_content(src)
|
||||
.eq_any_ignore_ascii_case_str(&["trick", "tricks"])
|
||||
});
|
||||
|
||||
let message = if tricky {
|
||||
"This idiom should use singular `book` instead of plural `books`."
|
||||
} else {
|
||||
"If this is a play on the idiom `oldest trick in the book`, it should use singular `book` instead of plural `books`."
|
||||
}
|
||||
.to_string();
|
||||
|
||||
Some(Lint {
|
||||
span: toks.last()?.span,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"book",
|
||||
toks.last()?.span.get_content(src),
|
||||
)],
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Detects the idiom `oldest X in the books`, which should use singular `book`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::OldestInTheBook;
|
||||
use crate::linting::tests::{assert_lint_message, assert_suggestion_result};
|
||||
|
||||
// Probable references to the idiom "oldest trick in the book"
|
||||
|
||||
#[test]
|
||||
fn fix_delphi_mistake() {
|
||||
assert_suggestion_result(
|
||||
"This is the oldest Delphi mistake in the books and I'm sure you've made it before (we all have), and I'm sure you recognise it when you see it.",
|
||||
OldestInTheBook::default(),
|
||||
"This is the oldest Delphi mistake in the book and I'm sure you've made it before (we all have), and I'm sure you recognise it when you see it.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_trick() {
|
||||
assert_suggestion_result(
|
||||
"... oldest trick in the books, a restart and it works all the times(for now).",
|
||||
OldestInTheBook::default(),
|
||||
"... oldest trick in the book, a restart and it works all the times(for now).",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_virus_trick() {
|
||||
assert_suggestion_result(
|
||||
"Once the OS is started the MBR is typically protected for virus reasons - this is one of the oldest virus tricks in the books - goes back to ...",
|
||||
OldestInTheBook::default(),
|
||||
"Once the OS is started the MBR is typically protected for virus reasons - this is one of the oldest virus tricks in the book - goes back to ...",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_mistake() {
|
||||
assert_suggestion_result(
|
||||
"Ok, I realized now that I was making the oldest mistake in the books with my code, dividing my v by 2 instead of dividing it by 5.",
|
||||
OldestInTheBook::default(),
|
||||
"Ok, I realized now that I was making the oldest mistake in the book with my code, dividing my v by 2 instead of dividing it by 5.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_tricks() {
|
||||
assert_suggestion_result(
|
||||
"He enables the oldest tricks in the books, create fear from thing like prosperity (we really don't need Foxconn?)",
|
||||
OldestInTheBook::default(),
|
||||
"He enables the oldest tricks in the book, create fear from thing like prosperity (we really don't need Foxconn?)",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_military_plays() {
|
||||
assert_suggestion_result(
|
||||
"Isnt that like one of the oldest military plays in the books?",
|
||||
OldestInTheBook::default(),
|
||||
"Isnt that like one of the oldest military plays in the book?",
|
||||
);
|
||||
}
|
||||
|
||||
// Test messages
|
||||
|
||||
#[test]
|
||||
fn is_oldest_trick_in_the_books_ref_to_idom() {
|
||||
assert_lint_message(
|
||||
"This is one of the oldest trick in the books",
|
||||
OldestInTheBook::default(),
|
||||
"This idiom should use singular `book` instead of plural `books`.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_chromatic_alterations_ref_to_idom() {
|
||||
assert_lint_message(
|
||||
"One of the oldest chromatic alterations in the books is the raising of the leading tone",
|
||||
OldestInTheBook::default(),
|
||||
"If this is a play on the idiom `oldest trick in the book`, it should use singular `book` instead of plural `books`.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -40,16 +40,13 @@ impl Default for OpenCompounds {
|
|||
}
|
||||
let compound = Lrc::new(SequenceExpr::default().then(compound_wordset));
|
||||
|
||||
let with_prev = SequenceExpr::default()
|
||||
.then_anything()
|
||||
.then(compound.clone());
|
||||
let with_prev = SequenceExpr::anything().then(compound.clone());
|
||||
|
||||
let with_next = SequenceExpr::default()
|
||||
.then(compound.clone())
|
||||
.then_anything();
|
||||
|
||||
let with_prev_and_next = SequenceExpr::default()
|
||||
.then_anything()
|
||||
let with_prev_and_next = SequenceExpr::anything()
|
||||
.then(compound.clone())
|
||||
.then_anything();
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,26 @@ impl ExprLinter for OrthographicConsistency {
|
|||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
matched_tokens: &[Token],
|
||||
source: &[char],
|
||||
context: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
if let Some((pre, post)) = context {
|
||||
if let Some(pre_tok) = pre.last()
|
||||
&& pre_tok.kind.is_hyphen()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(post_tok) = post.first()
|
||||
&& post_tok.kind.is_hyphen()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let word = &matched_tokens[0];
|
||||
|
||||
let Some(Some(metadata)) = word.kind.as_word() else {
|
||||
|
|
@ -52,6 +71,7 @@ impl ExprLinter for OrthographicConsistency {
|
|||
|
||||
if metadata.is_allcaps()
|
||||
&& !metadata.is_lowercase()
|
||||
&& !metadata.is_upper_camel()
|
||||
&& !cur_flags.contains(OrthFlags::ALLCAPS)
|
||||
{
|
||||
return Some(Lint {
|
||||
|
|
@ -74,9 +94,12 @@ impl ExprLinter for OrthographicConsistency {
|
|||
];
|
||||
|
||||
if flags_to_check
|
||||
.iter()
|
||||
.any(|flag| canonical_flags.contains(*flag) != cur_flags.contains(*flag))
|
||||
.into_iter()
|
||||
.filter(|flag| canonical_flags.contains(*flag) != cur_flags.contains(*flag))
|
||||
.count()
|
||||
== 1
|
||||
&& let Some(canonical) = self.dict.get_correct_capitalization_of(chars)
|
||||
&& canonical != chars
|
||||
{
|
||||
return Some(Lint {
|
||||
span: word.span,
|
||||
|
|
@ -100,7 +123,7 @@ impl ExprLinter for OrthographicConsistency {
|
|||
lint_kind: LintKind::Capitalization,
|
||||
suggestions: vec![Suggestion::ReplaceWith(canonical.to_vec())],
|
||||
message: format!(
|
||||
"The canonical dictionary spelling is `{}`.",
|
||||
"The canonical dictionary spelling is title case: `{}`.",
|
||||
canonical.iter().collect::<String>()
|
||||
),
|
||||
priority: 127,
|
||||
|
|
@ -356,4 +379,12 @@ mod tests {
|
|||
assert_no_lints(sentence, OrthographicConsistency::default());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_news() {
|
||||
assert_no_lints(
|
||||
"This is the best part of the news broadcast.",
|
||||
OrthographicConsistency::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -212,6 +212,7 @@ impl Linter for PhrasalVerbAsCompoundNoun {
|
|||
"architecture",
|
||||
"classes",
|
||||
"development",
|
||||
"developer",
|
||||
"docs",
|
||||
"ecosystem",
|
||||
"files",
|
||||
|
|
@ -757,4 +758,12 @@ mod tests {
|
|||
PhrasalVerbAsCompoundNoun::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_2369() {
|
||||
assert_no_lints(
|
||||
"## Plugin developer documentation",
|
||||
PhrasalVerbAsCompoundNoun::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,13 @@ pub fn lint_group() -> LintGroup {
|
|||
"Corrects `a couple of more` to `a couple more`.",
|
||||
LintKind::Redundancy
|
||||
),
|
||||
"AdNauseam" => (
|
||||
["as nauseam"],
|
||||
["ad nauseam"],
|
||||
"This phrase comes from Latin, where `ad` means `to`.",
|
||||
"Corrects `as nauseam` to `ad nauseam`.",
|
||||
LintKind::Spelling
|
||||
),
|
||||
"AfterAWhile" => (
|
||||
["after while"],
|
||||
["after a while"],
|
||||
|
|
@ -249,7 +256,7 @@ pub fn lint_group() -> LintGroup {
|
|||
["in built", "in-built", "built in"],
|
||||
["built-in"],
|
||||
"Prefer the hyphenated compound `built-in`.",
|
||||
"English convention treats `built-in` as a single, attributive adjective—meaning something integrated from the outset—whereas other forms like `in built` are non-standard and can feel awkward to readers."
|
||||
"English convention treats `built-in` as a single, attributive adjective—meaning something integrated from the outset—whereas other forms like `in built` are nonstandard and can feel awkward to readers."
|
||||
),
|
||||
"ByAccident" => (
|
||||
["on accident"],
|
||||
|
|
@ -384,6 +391,13 @@ pub fn lint_group() -> LintGroup {
|
|||
"Corrects `every since` to `ever since`.",
|
||||
LintKind::Typo
|
||||
),
|
||||
"EveryOnceAndAgain" => (
|
||||
["every once and again"],
|
||||
["every once in a while", "once again"],
|
||||
"For things that happen only occasionaly use `every once in a while. For things that persistently happen use `once again`.",
|
||||
"Corrects `every once and again` to `every once in a while` or `once again`.",
|
||||
LintKind::Usage
|
||||
),
|
||||
"EveryTime" => (
|
||||
["everytime"],
|
||||
["every time"],
|
||||
|
|
@ -426,13 +440,6 @@ pub fn lint_group() -> LintGroup {
|
|||
"Expands the abbreviation `w/o` to the full word `without` for clarity.",
|
||||
LintKind::Style
|
||||
),
|
||||
"Expatriate" => (
|
||||
["ex-patriot"],
|
||||
["expatriate"],
|
||||
"Use the correct term for someone living abroad.",
|
||||
"Fixes the misinterpretation of `expatriate`, ensuring the correct term is used for individuals residing abroad.",
|
||||
LintKind::Eggcorn
|
||||
),
|
||||
"FaceFirst" => (
|
||||
["face first into"],
|
||||
["face-first into"],
|
||||
|
|
@ -732,6 +739,13 @@ pub fn lint_group() -> LintGroup {
|
|||
"Corrects wrong variations of the idiomatic adjective `last-ditch`.",
|
||||
LintKind::Usage
|
||||
),
|
||||
"LastNight" => (
|
||||
["yesterday night"],
|
||||
["last night"],
|
||||
"The idiomatic phrase is `last night`.",
|
||||
"Flags `yesterday night` and suggests the standard phrasing `last night`.",
|
||||
LintKind::WordChoice
|
||||
),
|
||||
"LetAlone" => (
|
||||
["let along"],
|
||||
["let alone"],
|
||||
|
|
@ -756,7 +770,7 @@ pub fn lint_group() -> LintGroup {
|
|||
["low hanging fruit", "low hanging fruits", "low-hanging fruits"],
|
||||
["low-hanging fruit"],
|
||||
"The standard form is `low-hanging fruit` with a hyphen and singular form.",
|
||||
"Corrects non-standard variants of `low-hanging fruit`.",
|
||||
"Corrects nonstandard variants of `low-hanging fruit`.",
|
||||
LintKind::Usage
|
||||
),
|
||||
"ManagerialReins" => (
|
||||
|
|
@ -859,6 +873,13 @@ pub fn lint_group() -> LintGroup {
|
|||
"Corrects `ontop of` to `on top of`.",
|
||||
LintKind::BoundaryError
|
||||
),
|
||||
"PartsOfSpeech" => (
|
||||
["part of speeches", "parts of speeches"],
|
||||
["parts of speech"],
|
||||
"The correct plural is `parts of speech`.",
|
||||
"Corrects pluralizing the wrong noun in `part of speech`.",
|
||||
LintKind::Grammar
|
||||
),
|
||||
"PeaceOfMind" => (
|
||||
["piece of mind"],
|
||||
["peace of mind"],
|
||||
|
|
@ -874,7 +895,7 @@ pub fn lint_group() -> LintGroup {
|
|||
LintKind::Spelling
|
||||
),
|
||||
"PointsOfView" => (
|
||||
["point of views"],
|
||||
["point of views", "points of views"],
|
||||
["points of view"],
|
||||
"The correct plural is `points of view`.",
|
||||
"Corrects pluralizing the wrong noun in `point of view`.",
|
||||
|
|
@ -915,7 +936,7 @@ pub fn lint_group() -> LintGroup {
|
|||
["quite many"],
|
||||
["quite a few"],
|
||||
"Use `quite a few` instead of `quite many`.",
|
||||
"Corrects `quite many` to `quite a few`, which is the more natural and idiomatic phrase in standard English. `Quite many` is considered non-standard usage.",
|
||||
"Corrects `quite many` to `quite a few`, which is the more natural and idiomatic phrase in standard English. `Quite many` is considered nonstandard usage.",
|
||||
LintKind::Nonstandard
|
||||
),
|
||||
"RapidFire" => (
|
||||
|
|
@ -953,7 +974,7 @@ pub fn lint_group() -> LintGroup {
|
|||
LintKind::WordChoice
|
||||
),
|
||||
"RulesOfThumb" => (
|
||||
["rule of thumbs", "rule-of-thumbs"],
|
||||
["rule of thumbs", "rule-of-thumbs", "rules of thumbs"],
|
||||
["rules of thumb"],
|
||||
"The correct plural is `rules of thumb`.",
|
||||
"Corrects pluralizing the wrong noun in `rule of thumb`.",
|
||||
|
|
|
|||
|
|
@ -15,6 +15,24 @@ fn corrects_a_couple_of_more() {
|
|||
)
|
||||
}
|
||||
|
||||
// AdNauseam
|
||||
#[test]
|
||||
fn corrects_as_nauseam_1() {
|
||||
assert_suggestion_result(
|
||||
"As you say, discussed as nauseam, but no nearer a solution.",
|
||||
lint_group(),
|
||||
"As you say, discussed ad nauseam, but no nearer a solution.",
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn corrects_as_nauseam_2() {
|
||||
assert_suggestion_result(
|
||||
"no more autism please, hearing about it as nauseam is starting to make me sick",
|
||||
lint_group(),
|
||||
"no more autism please, hearing about it ad nauseam is starting to make me sick",
|
||||
);
|
||||
}
|
||||
|
||||
// AfterAWhile
|
||||
#[test]
|
||||
fn correct_after_while() {
|
||||
|
|
@ -713,6 +731,16 @@ fn detect_ever_since() {
|
|||
);
|
||||
}
|
||||
|
||||
// EveryOnceAndAgain
|
||||
#[test]
|
||||
fn fix_every_once_and_again() {
|
||||
assert_suggestion_result(
|
||||
"Ys have been replaced with Ps, happens randomly every once and again with different letters",
|
||||
lint_group(),
|
||||
"Ys have been replaced with Ps, happens randomly every once in a while with different letters",
|
||||
);
|
||||
}
|
||||
|
||||
// EveryTime
|
||||
#[test]
|
||||
fn fix_everytime() {
|
||||
|
|
@ -770,9 +798,6 @@ fn expand_cuz() {
|
|||
// ExpandWithout
|
||||
// -none-
|
||||
|
||||
// Expatriate
|
||||
// -none-
|
||||
|
||||
// FaceFirst
|
||||
// -none-
|
||||
|
||||
|
|
@ -1105,6 +1130,48 @@ fn correct_last_ditch_space() {
|
|||
);
|
||||
}
|
||||
|
||||
// LastNight
|
||||
#[test]
|
||||
fn corrects_yesterday_night_basic() {
|
||||
assert_suggestion_result(
|
||||
"I was there yesterday night.",
|
||||
lint_group(),
|
||||
"I was there last night.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_yesterday_night_capitalized() {
|
||||
assert_suggestion_result(
|
||||
"Yesterday night was fun.",
|
||||
lint_group(),
|
||||
"Last night was fun.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_yesterday_night_with_comma() {
|
||||
assert_suggestion_result(
|
||||
"Yesterday night, we watched a movie.",
|
||||
lint_group(),
|
||||
"Last night, we watched a movie.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_yesterday_night_across_newline() {
|
||||
assert_suggestion_result(
|
||||
"They left yesterday\nnight after the show.",
|
||||
lint_group(),
|
||||
"They left last night after the show.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_lint_for_last_night_phrase() {
|
||||
assert_lint_count("I remember last night clearly.", lint_group(), 0);
|
||||
}
|
||||
|
||||
// LetAlone
|
||||
#[test]
|
||||
fn let_along() {
|
||||
|
|
@ -1434,6 +1501,26 @@ fn correct_on_top_of() {
|
|||
);
|
||||
}
|
||||
|
||||
// PartOfSpeech
|
||||
#[test]
|
||||
fn corrects_part_of_speeches() {
|
||||
assert_suggestion_result(
|
||||
"The part of speeches (POS) or as follows:",
|
||||
lint_group(),
|
||||
"The parts of speech (POS) or as follows:",
|
||||
)
|
||||
}
|
||||
|
||||
// It can connect different parts of speeches e.g noun to adjective, adjective to adverb, noun to verb etc.
|
||||
#[test]
|
||||
fn corrects_parts_of_speeches() {
|
||||
assert_suggestion_result(
|
||||
"It can connect different parts of speeches e.g noun to adjective, adjective to adverb, noun to verb etc.",
|
||||
lint_group(),
|
||||
"It can connect different parts of speech e.g noun to adjective, adjective to adverb, noun to verb etc.",
|
||||
)
|
||||
}
|
||||
|
||||
// PeaceOfMind
|
||||
#[test]
|
||||
fn corrects_piece_of_mind() {
|
||||
|
|
@ -1474,7 +1561,7 @@ fn corrects_per_say_hyphenated() {
|
|||
|
||||
// PointsOfView
|
||||
#[test]
|
||||
fn corrects_points_of_view() {
|
||||
fn corrects_point_of_views() {
|
||||
assert_suggestion_result(
|
||||
"This will produce a huge amount of raw data, representing the region in multiple point of views.",
|
||||
lint_group(),
|
||||
|
|
@ -1482,6 +1569,16 @@ fn corrects_points_of_view() {
|
|||
)
|
||||
}
|
||||
|
||||
// log events, places, moods and self-reflect from various points of views
|
||||
#[test]
|
||||
fn corrects_points_of_views() {
|
||||
assert_suggestion_result(
|
||||
"log events, places, moods and self-reflect from various points of views",
|
||||
lint_group(),
|
||||
"log events, places, moods and self-reflect from various points of view",
|
||||
)
|
||||
}
|
||||
|
||||
// PrayingMantis
|
||||
// -none-
|
||||
|
||||
|
|
@ -1539,7 +1636,7 @@ fn correct_iirc_correctly() {
|
|||
// RulesOfThumb
|
||||
|
||||
#[test]
|
||||
fn correct_rules_of_thumbs() {
|
||||
fn correct_rule_of_thumbs() {
|
||||
assert_suggestion_result(
|
||||
"Thanks. 0.2 is just from my rule of thumbs.",
|
||||
lint_group(),
|
||||
|
|
@ -1547,6 +1644,15 @@ fn correct_rules_of_thumbs() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_rules_of_thumbs() {
|
||||
assert_suggestion_result(
|
||||
"But as rules of thumbs, what is said in config file should be respected whatever parameter (field or directory) is passed to php-cs-fixer.phar.",
|
||||
lint_group(),
|
||||
"But as rules of thumb, what is said in config file should be respected whatever parameter (field or directory) is passed to php-cs-fixer.phar.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_rules_of_thumbs_hyphenated() {
|
||||
assert_suggestion_result(
|
||||
|
|
|
|||
|
|
@ -252,6 +252,33 @@ pub fn lint_group() -> LintGroup {
|
|||
"`Invest` is traditionally followed by 'in,' not `into.`",
|
||||
LintKind::Usage
|
||||
),
|
||||
|
||||
// General litotes (double negatives) → direct positive suggestions
|
||||
"LitotesDirectPositive" => (
|
||||
&[
|
||||
("not uncommon", "common"),
|
||||
("not unusual", "common"),
|
||||
("not insignificant", "significant"),
|
||||
("not unimportant", "important"),
|
||||
("not unlikely", "likely"),
|
||||
("not infrequent", "frequent"),
|
||||
("not inaccurate", "accurate"),
|
||||
("not unclear", "clear"),
|
||||
("not irrelevant", "relevant"),
|
||||
("not unpredictable", "predictable"),
|
||||
("not inadequate", "adequate"),
|
||||
("not unpleasant", "pleasant"),
|
||||
("not unreasonable", "reasonable"),
|
||||
("not impossible", "possible"),
|
||||
("more preferable", "preferable"),
|
||||
("not online", "offline"),
|
||||
("not offline", "online"),
|
||||
],
|
||||
"Consider the direct form.",
|
||||
"Offers direct-positive alternatives when double negatives might feel heavy.",
|
||||
LintKind::Style
|
||||
),
|
||||
|
||||
"MakeDoWith" => (
|
||||
&[
|
||||
("make due with", "make do with"),
|
||||
|
|
@ -262,6 +289,17 @@ pub fn lint_group() -> LintGroup {
|
|||
"Use `do` instead of `due` when referring to a resource scarcity.",
|
||||
"Corrects `make due` to `make do` when followed by `with`."
|
||||
),
|
||||
"MakeSense" => (
|
||||
&[
|
||||
("make senses", "make sense"),
|
||||
("made senses", "made sense"),
|
||||
("makes senses", "makes sense"),
|
||||
("making senses", "making sense")
|
||||
],
|
||||
"Use `sense` instead of `senses`.",
|
||||
"Corrects `make senses` to `make sense`.",
|
||||
LintKind::Usage
|
||||
),
|
||||
"MootPoint" => (
|
||||
&[
|
||||
("mute point", "moot point"),
|
||||
|
|
@ -299,33 +337,6 @@ pub fn lint_group() -> LintGroup {
|
|||
"Corrects the eggcorn `piggy bag` to `piggyback`, which is the proper term for riding on someone’s back or using an existing system.",
|
||||
LintKind::Eggcorn
|
||||
),
|
||||
|
||||
// General litotes (double negatives) → direct positive suggestions
|
||||
"LitotesDirectPositive" => (
|
||||
&[
|
||||
("not uncommon", "common"),
|
||||
("not unusual", "common"),
|
||||
("not insignificant", "significant"),
|
||||
("not unimportant", "important"),
|
||||
("not unlikely", "likely"),
|
||||
("not infrequent", "frequent"),
|
||||
("not inaccurate", "accurate"),
|
||||
("not unclear", "clear"),
|
||||
("not irrelevant", "relevant"),
|
||||
("not unpredictable", "predictable"),
|
||||
("not inadequate", "adequate"),
|
||||
("not unpleasant", "pleasant"),
|
||||
("not unreasonable", "reasonable"),
|
||||
("not impossible", "possible"),
|
||||
("more preferable", "preferable"),
|
||||
("not online", "offline"),
|
||||
("not offline", "online"),
|
||||
],
|
||||
"Consider the direct form.",
|
||||
"Offers direct-positive alternatives when double negatives might feel heavy.",
|
||||
LintKind::Style
|
||||
),
|
||||
|
||||
// Redundant degree modifiers on positives (double positives) → base form
|
||||
"RedundantSuperlatives" => (
|
||||
&[
|
||||
|
|
@ -338,6 +349,17 @@ pub fn lint_group() -> LintGroup {
|
|||
"Simplifies redundant double positives like `most optimal` to the base form.",
|
||||
LintKind::Redundancy
|
||||
),
|
||||
"WreakHavoc" => (
|
||||
&[
|
||||
("wreck havoc", "wreak havoc"),
|
||||
("wrecked havoc", "wreaked havoc"),
|
||||
("wrecking havoc", "wreaking havoc"),
|
||||
("wrecks havoc", "wreaks havoc"),
|
||||
],
|
||||
"Did you mean `wreak havoc`?",
|
||||
"Corrects the eggcorn `wreck havoc` to `wreak havoc`, which is the proper term for causing chaos or destruction.",
|
||||
LintKind::Eggcorn
|
||||
)
|
||||
});
|
||||
|
||||
add_many_to_many_mappings!(group, {
|
||||
|
|
@ -352,6 +374,37 @@ pub fn lint_group() -> LintGroup {
|
|||
"Suggests using either `await` or `wait for` but not both, as they express the same meaning.",
|
||||
LintKind::Redundancy
|
||||
),
|
||||
"Copyright" => (
|
||||
&[
|
||||
(&["copywrite"], &["copyright"]),
|
||||
(&["copywrites"], &["copyrights"]),
|
||||
(&["copywriting"], &["copyrighting"]),
|
||||
(&["copywritten", "copywrited", "copywrote"], &["copyrighted"]),
|
||||
],
|
||||
"Did you mean `copyright`? `Copywrite` means to write copy (advertising text), while `copyright` is the legal right to control use of creative works.",
|
||||
"Corrects `copywrite` to `copyright`. `Copywrite` refers to writing copy, while `copyright` is the legal right to creative works.",
|
||||
LintKind::WordChoice
|
||||
),
|
||||
"Expat" => (
|
||||
&[
|
||||
(&["ex-pat", "ex pat"], &["expat"]),
|
||||
(&["ex-pats", "ex pats"], &["expats"]),
|
||||
(&["ex-pat's", "ex pat's"], &["expat's"]),
|
||||
],
|
||||
"The correct spelling is `expat` with no hyphen or space.",
|
||||
"Corrects the mistake of writing `expat` as two words.",
|
||||
LintKind::Spelling
|
||||
),
|
||||
"Expatriate" => (
|
||||
&[
|
||||
(&["ex-patriot", "expatriot", "ex patriot"], &["expatriate"]),
|
||||
(&["ex-patriots", "expatriots", "ex patriots"], &["expatriates"]),
|
||||
(&["ex-patriot's", "expatriot's", "ex patriot's"], &["expatriate's"]),
|
||||
],
|
||||
"Use the correct term for someone living abroad.",
|
||||
"Fixes the misinterpretation of `expatriate`, ensuring the correct term is used for individuals residing abroad.",
|
||||
LintKind::Eggcorn
|
||||
),
|
||||
"GetRidOf" => (
|
||||
&[
|
||||
(&["get rid off", "get ride of", "get ride off"], &["get rid of"]),
|
||||
|
|
|
|||
|
|
@ -782,6 +782,44 @@ fn corrects_making_due_with() {
|
|||
);
|
||||
}
|
||||
|
||||
// MakeSense
|
||||
|
||||
#[test]
|
||||
fn fix_make_senses() {
|
||||
assert_suggestion_result(
|
||||
"some symbols make senses only if you have a certain keyboard",
|
||||
lint_group(),
|
||||
"some symbols make sense only if you have a certain keyboard",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_made_senses() {
|
||||
assert_suggestion_result(
|
||||
"Usually on the examples of matlab central I have found all with positive magnitude and made senses to me.",
|
||||
lint_group(),
|
||||
"Usually on the examples of matlab central I have found all with positive magnitude and made sense to me.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_makes_senses() {
|
||||
assert_suggestion_result(
|
||||
"If it makes senses I can open a PR.",
|
||||
lint_group(),
|
||||
"If it makes sense I can open a PR.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_making_senses() {
|
||||
assert_suggestion_result(
|
||||
"I appreciate you mentioned the two use cases, which are making senses for both.",
|
||||
lint_group(),
|
||||
"I appreciate you mentioned the two use cases, which are making sense for both.",
|
||||
);
|
||||
}
|
||||
|
||||
// MootPoint
|
||||
|
||||
// -point is mute-
|
||||
|
|
@ -832,8 +870,46 @@ fn correct_passer_bys_hyphen() {
|
|||
// Piggyback
|
||||
// -none-
|
||||
|
||||
// WreakHavoc
|
||||
|
||||
// Many to many tests
|
||||
|
||||
#[test]
|
||||
fn fix_wreck_havoc() {
|
||||
assert_suggestion_result(
|
||||
"Tables with a \".\" in the name wreck havoc with the system",
|
||||
lint_group(),
|
||||
"Tables with a \".\" in the name wreak havoc with the system",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_wrecked_havoc() {
|
||||
assert_suggestion_result(
|
||||
"It would have been some weird local configuration of LO that wrecked havoc.",
|
||||
lint_group(),
|
||||
"It would have been some weird local configuration of LO that wreaked havoc.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_wrecking_havoc() {
|
||||
assert_suggestion_result(
|
||||
"Multi-line edit is wrecking havoc with indention",
|
||||
lint_group(),
|
||||
"Multi-line edit is wreaking havoc with indention",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_wrecks_havoc() {
|
||||
assert_suggestion_result(
|
||||
"Small POC using rust with ptrace that wrecks havoc on msync",
|
||||
lint_group(),
|
||||
"Small POC using rust with ptrace that wreaks havoc on msync",
|
||||
);
|
||||
}
|
||||
|
||||
// AwaitFor
|
||||
|
||||
#[test]
|
||||
|
|
@ -888,6 +964,130 @@ fn correct_awaited_for() {
|
|||
);
|
||||
}
|
||||
|
||||
// Copyright
|
||||
|
||||
#[test]
|
||||
fn copywritten() {
|
||||
assert_suggestion_result(
|
||||
"Including digital copies of copywritten artwork with the project isn't advised.",
|
||||
lint_group(),
|
||||
"Including digital copies of copyrighted artwork with the project isn't advised.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copywrites() {
|
||||
assert_suggestion_result(
|
||||
"Code is 99% copy/pasted from OpenSSH with an attempt to retain all copywrites",
|
||||
lint_group(),
|
||||
"Code is 99% copy/pasted from OpenSSH with an attempt to retain all copyrights",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copywrited() {
|
||||
assert_suggestion_result(
|
||||
"Proprietary copywrited code",
|
||||
lint_group(),
|
||||
"Proprietary copyrighted code",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copywrited_all_caps() {
|
||||
assert_suggestion_result(
|
||||
"URLS MAY CONTAIN COPYWRITED MATERIAL",
|
||||
lint_group(),
|
||||
"URLS MAY CONTAIN COPYRIGHTED MATERIAL",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copywrote() {
|
||||
assert_suggestion_result(
|
||||
"How do you find out if someone copywrote a movie",
|
||||
lint_group(),
|
||||
"How do you find out if someone copyrighted a movie",
|
||||
);
|
||||
}
|
||||
|
||||
// Expat
|
||||
|
||||
#[test]
|
||||
fn correct_ex_pat_hyphen() {
|
||||
assert_suggestion_result(
|
||||
"It seems ex-pat means the person will be in a foreign country temporarily",
|
||||
lint_group(),
|
||||
"It seems expat means the person will be in a foreign country temporarily",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_ex_pats_hyphen() {
|
||||
assert_suggestion_result(
|
||||
"So, it might be correct to call most Brits ex-pats.",
|
||||
lint_group(),
|
||||
"So, it might be correct to call most Brits expats.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_ex_pat_space() {
|
||||
assert_suggestion_result(
|
||||
"For me, the term ex pat embodies the exquisite hypocrisy of certain people feeling entitled",
|
||||
lint_group(),
|
||||
"For me, the term expat embodies the exquisite hypocrisy of certain people feeling entitled",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "replace_with_match_case results in ExPats"]
|
||||
fn correct_ex_pats_space() {
|
||||
assert_suggestion_result(
|
||||
"Why are Brits who emigrate \"Ex Pats\" but people who come here \"immigrants\"?",
|
||||
lint_group(),
|
||||
"Why are Brits who emigrate \"Expats\" but people who come here \"immigrants\"?",
|
||||
);
|
||||
}
|
||||
|
||||
// Expatriate
|
||||
|
||||
#[test]
|
||||
fn correct_expatriot() {
|
||||
assert_suggestion_result(
|
||||
"Another expatriot of the era, James Joyce, also followed Papa's writing and drinking schedule.",
|
||||
lint_group(),
|
||||
"Another expatriate of the era, James Joyce, also followed Papa's writing and drinking schedule.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_expatriots() {
|
||||
assert_suggestion_result(
|
||||
"Expatriots, upon discovering the delightful nuances of Dutch pronunciation, often find themselves in stitches.",
|
||||
lint_group(),
|
||||
"Expatriates, upon discovering the delightful nuances of Dutch pronunciation, often find themselves in stitches.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_ex_patriot_hyphen() {
|
||||
assert_suggestion_result(
|
||||
"Then I added we should all be using the word 移民 immigrant, not ex-patriot, not 外国人 gaikokujin, and definitely not 外人 gaijin",
|
||||
lint_group(),
|
||||
"Then I added we should all be using the word 移民 immigrant, not expatriate, not 外国人 gaikokujin, and definitely not 外人 gaijin",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_ex_patriots_hyphen() {
|
||||
assert_suggestion_result(
|
||||
"Ex-patriots who move to Hong Kong to seek greener pastures and to experience a new culture seem to bring their own cultural baggage with them.",
|
||||
lint_group(),
|
||||
"Expatriates who move to Hong Kong to seek greener pastures and to experience a new culture seem to bring their own cultural baggage with them.",
|
||||
);
|
||||
}
|
||||
|
||||
// GetRidOf
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::TokenKind;
|
||||
use crate::expr::Expr;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::{CharString, CharStringExt, Token, char_string::char_string, patterns::WordSet};
|
||||
|
|
@ -16,9 +17,10 @@ impl Default for PiqueInterest {
|
|||
"peak", "peaked", "peek", "peeked", "peeking", "peaking",
|
||||
]))
|
||||
.then_whitespace()
|
||||
.then(|tok: &Token, _: &[char]| {
|
||||
tok.kind.is_non_plural_nominal() || tok.kind.is_possessive_determiner()
|
||||
})
|
||||
.then_kind_either(
|
||||
TokenKind::is_non_plural_nominal,
|
||||
TokenKind::is_possessive_determiner,
|
||||
)
|
||||
.then_whitespace()
|
||||
.t_aco("interest");
|
||||
|
||||
|
|
|
|||
|
|
@ -23,14 +23,9 @@ where
|
|||
.then_kind_is_but_is_not(TokenKind::is_plural_nominal, TokenKind::is_singular_nominal)
|
||||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::NOUN, UPOS::PROPN]))
|
||||
.then_optional(SequenceExpr::default().t_any().t_any());
|
||||
.then_optional(SequenceExpr::anything().t_any());
|
||||
|
||||
let additional_req = SequenceExpr::default()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.then_noun();
|
||||
let additional_req = SequenceExpr::anything().t_any().t_any().t_any().then_noun();
|
||||
|
||||
let exceptions = SequenceExpr::default()
|
||||
.then_unless(|tok: &Token, _: &[char]| tok.kind.is_demonstrative_determiner())
|
||||
|
|
|
|||
|
|
@ -13,12 +13,12 @@ pub struct PronounAre {
|
|||
impl Default for PronounAre {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then(|tok: &Token, _src: &[char]| {
|
||||
tok.kind.is_pronoun()
|
||||
&& tok.kind.is_subject_pronoun()
|
||||
&& (tok.kind.is_second_person_pronoun()
|
||||
|| tok.kind.is_first_person_plural_pronoun()
|
||||
|| tok.kind.is_third_person_plural_pronoun())
|
||||
.then_kind_where(|kind| {
|
||||
kind.is_pronoun()
|
||||
&& kind.is_subject_pronoun()
|
||||
&& (kind.is_second_person_pronoun()
|
||||
|| kind.is_first_person_plural_pronoun()
|
||||
|| kind.is_third_person_plural_pronoun())
|
||||
})
|
||||
.t_ws()
|
||||
.t_aco("r");
|
||||
|
|
|
|||
|
|
@ -52,9 +52,8 @@ impl PronounInflectionBe {
|
|||
map.insert(arent, "isn't");
|
||||
|
||||
let is = SequenceExpr::default()
|
||||
.then(|tok: &Token, _: &[char]| {
|
||||
tok.kind
|
||||
.as_word()
|
||||
.then_kind_where(|kind| {
|
||||
kind.as_word()
|
||||
.as_ref()
|
||||
.and_then(|m| m.as_ref().and_then(|m| m.np_member))
|
||||
.unwrap_or_default()
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@ impl Default for QuiteQuiet {
|
|||
if !tok.kind.is_verb() || !tok.kind.is_apostrophized() {
|
||||
return false;
|
||||
}
|
||||
let chars = tok.span.get_content(src);
|
||||
chars.ends_with_ignore_ascii_case_chars(&['n', '\'', 't'])
|
||||
|| chars.ends_with_ignore_ascii_case_chars(&['n', '’', 't'])
|
||||
tok.span
|
||||
.get_content(src)
|
||||
.ends_with_any_ignore_ascii_case_chars(&[&['n', '\'', 't'], &['n', '’', 't']])
|
||||
})
|
||||
.t_ws()
|
||||
.t_aco("quiet");
|
||||
|
|
|
|||
354
harper-core/src/linting/redundant_acronyms.rs
Normal file
354
harper-core/src/linting/redundant_acronyms.rs
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
use crate::{
|
||||
CharStringExt, Token,
|
||||
expr::{Expr, FirstMatchOf, SequenceExpr},
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion, expr_linter::Chunk},
|
||||
patterns::Word,
|
||||
token_string_ext::TokenStringExt,
|
||||
};
|
||||
|
||||
// (acronym, first_words, last_word)
|
||||
const ACRONYMS: &[(&str, &[&str], &str)] = &[
|
||||
("ATM", &["automated teller", "automatic teller"], "machine"),
|
||||
("GUI", &["graphical user"], "interface"),
|
||||
("LCD", &["liquid crystal"], "display"),
|
||||
// Note: "pin number" (not capitalized) is used to refer to GPIO pins etc.
|
||||
("PIN", &["personal identification"], "number"),
|
||||
("TUI", &["text-based user", "terminal user"], "interface"),
|
||||
("UI", &["user"], "interface"),
|
||||
("VIN", &["vehicle identification"], "number"),
|
||||
];
|
||||
|
||||
pub struct RedundantAcronyms {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for RedundantAcronyms {
|
||||
fn default() -> Self {
|
||||
let exprs: Vec<Box<dyn Expr>> = ACRONYMS
|
||||
.iter()
|
||||
.map(|&(acronym, _, last_str)| {
|
||||
let last_string = last_str.to_string();
|
||||
Box::new(SequenceExpr::aco(acronym).t_ws().then_any_of(vec![
|
||||
Box::new(Word::new(last_str)),
|
||||
Box::new(move |t: &Token, src: &[char]| {
|
||||
t.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str(&format!("{last_string}s"))
|
||||
}),
|
||||
])) as Box<dyn Expr>
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
expr: Box::new(FirstMatchOf::new(exprs)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for RedundantAcronyms {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let last_word_span = toks.last()?.span;
|
||||
let last_word_chars = last_word_span.get_content(src);
|
||||
let acronym_str = toks.first()?.span.get_content_string(src);
|
||||
|
||||
// "pin number" (lowercase) is used to refer to the pins on microchips, etc.
|
||||
if acronym_str.eq_ignore_ascii_case("PIN") && acronym_str != "PIN" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (_, middle_words, _) = ACRONYMS
|
||||
.iter()
|
||||
.find(|(a, _, _)| (*a).eq_ignore_ascii_case(&acronym_str))?;
|
||||
|
||||
let is_all_caps = last_word_chars
|
||||
.iter()
|
||||
.all(|c| c.is_ascii_alphabetic() && c.is_ascii_uppercase());
|
||||
|
||||
let plural_ending = last_word_chars
|
||||
.last()
|
||||
.filter(|&&c| c.eq_ignore_ascii_case(&'s'))
|
||||
.map(|c| c.to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let suggestions: Vec<Suggestion> = std::iter::once(Suggestion::ReplaceWith(
|
||||
format!("{acronym_str}{plural_ending}").chars().collect(),
|
||||
))
|
||||
.chain(middle_words.iter().map(|mw| {
|
||||
let middle_words = if is_all_caps {
|
||||
mw.to_ascii_uppercase()
|
||||
} else {
|
||||
mw.to_string()
|
||||
};
|
||||
Suggestion::ReplaceWith(
|
||||
format!("{middle_words} {}", last_word_span.get_content_string(src))
|
||||
.chars()
|
||||
.collect(),
|
||||
)
|
||||
}))
|
||||
.collect();
|
||||
|
||||
Some(Lint {
|
||||
span: toks.span()?,
|
||||
lint_kind: LintKind::Redundancy,
|
||||
suggestions,
|
||||
message: "The acronym's last letter already stands for the last word. Use just the acronym or the full phrase.".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Identifies redundant acronyms where the last word repeats the last letter's meaning (e.g., `ATM machine` → `ATM` or `automated teller machine`)."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::RedundantAcronyms;
|
||||
use crate::linting::tests::{assert_good_and_bad_suggestions, assert_no_lints};
|
||||
|
||||
#[test]
|
||||
fn test_made_up() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"I forgot my PIN number!",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"I forgot my PIN!",
|
||||
"I forgot my personal identification number!",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_caps_singular() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"CAN TWO CARS HAVE THE SAME VIN NUMBER?",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"CAN TWO CARS HAVE THE SAME VIN?",
|
||||
"CAN TWO CARS HAVE THE SAME VEHICLE IDENTIFICATION NUMBER?",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_caps_plural() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"THESE ATM MACHINES ALL HAVE HIGH FEES!",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"THESE ATMS ALL HAVE HIGH FEES!",
|
||||
"THESE AUTOMATED TELLER MACHINES ALL HAVE HIGH FEES!",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_lowercase_singular() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"the atm machine at my card",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"the atm at my card",
|
||||
"the automated teller machine at my card",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_lowercase_plural() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"gui interfaces were sooo trendy in 1984!",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"guis were sooo trendy in 1984!",
|
||||
"graphical user interfaces were sooo trendy in 1984!",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_atm_machine() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Developed an ATM machine application for Raspberry Pi",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"Developed an ATM application for Raspberry Pi",
|
||||
"Developed an automatic teller machine application for Raspberry Pi",
|
||||
"Developed an automated teller machine application for Raspberry Pi",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_atm_machines() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"ATM machines allow 4 or 6 digit PIN codes",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"ATMs allow 4 or 6 digit PIN codes",
|
||||
"automated teller machines allow 4 or 6 digit PIN codes",
|
||||
"automatic teller machines allow 4 or 6 digit PIN codes",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_gui_interface() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"This project develops using java language with GUI interface.",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"This project develops using java language with GUI.",
|
||||
"This project develops using java language with graphical user interface.",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_gui_interfaces() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"In non-crafting GUI interfaces, such as a mod's own recipe tree, the shortcut key cannot be used to view item usage or crafting methods.",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"In non-crafting GUIs, such as a mod's own recipe tree, the shortcut key cannot be used to view item usage or crafting methods.",
|
||||
"In non-crafting graphical user interfaces, such as a mod's own recipe tree, the shortcut key cannot be used to view item usage or crafting methods.",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_lcd_display() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"This function accepts I2C shield address for LCD display, number of columns, rows and dot size",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"This function accepts I2C shield address for LCD, number of columns, rows and dot size",
|
||||
"This function accepts I2C shield address for liquid crystal display, number of columns, rows and dot size",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_lcd_displays() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"ScreenUi makes it easy to build simple or complex character based user interfaces on small LCD displays like those commonly used with Arduinos.",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"ScreenUi makes it easy to build simple or complex character based user interfaces on small LCDs like those commonly used with Arduinos.",
|
||||
"ScreenUi makes it easy to build simple or complex character based user interfaces on small liquid crystal displays like those commonly used with Arduinos.",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_pin_numbers_caps() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Randomly generating PIN numbers for ATM access.",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"Randomly generating PINs for ATM access.",
|
||||
"Randomly generating personal identification numbers for ATM access.",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_pin_number_all_caps() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"DON'T LET ANYONE SEE YOUR PIN NUMBER",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"DON'T LET ANYONE SEE YOUR PIN",
|
||||
"DON'T LET ANYONE SEE YOUR PERSONAL IDENTIFICATION NUMBER",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_correct_pin_number_lowercase() {
|
||||
assert_no_lints(
|
||||
"GPIO 26 (pin 37) on the Pi4 is mapped to pin nummer GPIO 425 on the pi5",
|
||||
RedundantAcronyms::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_correct_pin_number_titlecase() {
|
||||
assert_no_lints(
|
||||
"Pin Number Match Project in Javascript.",
|
||||
RedundantAcronyms::default(),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_tui_interface() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Could a history search TUI interface be added for xonsh?",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"Could a history search TUI be added for xonsh?",
|
||||
"Could a history search text-based user interface be added for xonsh?",
|
||||
"Could a history search terminal user interface be added for xonsh?",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_ui_interface() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"call ESPUI.begin(\"Some Title\"); to start the UI interface",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"call ESPUI.begin(\"Some Title\"); to start the UI",
|
||||
"call ESPUI.begin(\"Some Title\"); to start the user interface",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_vin_numbers() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"That was actually accurate in decoding the VIN numbers but it costed me a 1000 USD.",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"That was actually accurate in decoding the VINs but it costed me a 1000 USD.",
|
||||
"That was actually accurate in decoding the vehicle identification numbers but it costed me a 1000 USD.",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_vin_number() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"we have implemented verification algorithms, which ensure that VIN number received is correct",
|
||||
RedundantAcronyms::default(),
|
||||
&[
|
||||
"we have implemented verification algorithms, which ensure that VIN received is correct",
|
||||
"we have implemented verification algorithms, which ensure that vehicle identification number received is correct",
|
||||
],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
}
|
||||
180
harper-core/src/linting/respond.rs
Normal file
180
harper-core/src/linting/respond.rs
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::Token;
|
||||
use crate::expr::{Expr, ExprMap, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::patterns::Word;
|
||||
|
||||
pub struct Respond {
|
||||
expr: Box<dyn Expr>,
|
||||
map: Arc<ExprMap<usize>>,
|
||||
}
|
||||
|
||||
impl Default for Respond {
|
||||
fn default() -> Self {
|
||||
let mut map = ExprMap::default();
|
||||
|
||||
let helper_verb = |tok: &Token, src: &[char]| {
|
||||
if tok.kind.is_auxiliary_verb() {
|
||||
return true;
|
||||
}
|
||||
|
||||
if !tok.kind.is_verb() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let lower = tok.span.get_content_string(src).to_lowercase();
|
||||
matches!(
|
||||
lower.as_str(),
|
||||
"do" | "did" | "does" | "won't" | "don't" | "didn't" | "doesn't"
|
||||
)
|
||||
};
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then_nominal()
|
||||
.t_ws()
|
||||
.then(helper_verb)
|
||||
.t_ws()
|
||||
.then(Word::new("response")),
|
||||
4,
|
||||
);
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then_nominal()
|
||||
.t_ws()
|
||||
.then(helper_verb)
|
||||
.t_ws()
|
||||
.then_adverb()
|
||||
.t_ws()
|
||||
.then(Word::new("response")),
|
||||
6,
|
||||
);
|
||||
|
||||
let map = Arc::new(map);
|
||||
|
||||
Self {
|
||||
expr: Box::new(map.clone()),
|
||||
map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for Respond {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let response_index = *self.map.lookup(0, matched_tokens, source)?;
|
||||
let response_token = matched_tokens.get(response_index)?;
|
||||
|
||||
Some(Lint {
|
||||
span: response_token.span,
|
||||
lint_kind: LintKind::WordChoice,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"respond",
|
||||
response_token.span.get_content(source),
|
||||
)],
|
||||
message: "Use the verb `respond` here.".to_owned(),
|
||||
priority: 40,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Flags uses of the noun `response` where the verb `respond` is needed after an auxiliary."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Respond;
|
||||
use crate::linting::tests::{assert_lint_count, assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn fixes_will_response() {
|
||||
assert_suggestion_result(
|
||||
"He will response soon.",
|
||||
Respond::default(),
|
||||
"He will respond soon.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_can_response() {
|
||||
assert_suggestion_result(
|
||||
"They can response to the survey.",
|
||||
Respond::default(),
|
||||
"They can respond to the survey.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_did_not_response() {
|
||||
assert_suggestion_result(
|
||||
"I did not response yesterday.",
|
||||
Respond::default(),
|
||||
"I did not respond yesterday.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_might_quickly_response() {
|
||||
assert_suggestion_result(
|
||||
"She might quickly response to feedback.",
|
||||
Respond::default(),
|
||||
"She might quickly respond to feedback.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_wont_response() {
|
||||
assert_suggestion_result(
|
||||
"They won't response in time.",
|
||||
Respond::default(),
|
||||
"They won't respond in time.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_would_response() {
|
||||
assert_suggestion_result(
|
||||
"We would response if we could.",
|
||||
Respond::default(),
|
||||
"We would respond if we could.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_should_response() {
|
||||
assert_suggestion_result(
|
||||
"You should response politely.",
|
||||
Respond::default(),
|
||||
"You should respond politely.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_correct_respond() {
|
||||
assert_no_lints("Please respond when you can.", Respond::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_noun_use() {
|
||||
assert_no_lints("The response time was great.", Respond::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_question_subject() {
|
||||
assert_lint_count("Should response times be logged?", Respond::default(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_response_as_object() {
|
||||
assert_no_lints("I have no response for that.", Respond::default());
|
||||
}
|
||||
}
|
||||
164
harper-core/src/linting/right_click.rs
Normal file
164
harper-core/src/linting/right_click.rs
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
Token, TokenStringExt,
|
||||
expr::{Expr, ExprMap, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::DerivedFrom,
|
||||
};
|
||||
|
||||
pub struct RightClick {
|
||||
expr: Box<dyn Expr>,
|
||||
map: Arc<ExprMap<usize>>,
|
||||
}
|
||||
|
||||
impl Default for RightClick {
|
||||
fn default() -> Self {
|
||||
let mut map = ExprMap::default();
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then_word_set(&["right", "left", "middle"])
|
||||
.t_ws()
|
||||
.then(DerivedFrom::new_from_str("click")),
|
||||
0,
|
||||
);
|
||||
|
||||
let map = Arc::new(map);
|
||||
|
||||
Self {
|
||||
expr: Box::new(map.clone()),
|
||||
map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for RightClick {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let start_idx = *self.map.lookup(0, matched_tokens, source)?;
|
||||
let click_idx = matched_tokens.len().checked_sub(1)?;
|
||||
let span = matched_tokens.get(start_idx..=click_idx)?.span()?;
|
||||
let template = span.get_content(source);
|
||||
|
||||
let direction = matched_tokens.get(start_idx)?.span.get_content(source);
|
||||
let click = matched_tokens.get(click_idx)?.span.get_content(source);
|
||||
|
||||
let replacement: Vec<char> = direction
|
||||
.iter()
|
||||
.copied()
|
||||
.chain(['-'])
|
||||
.chain(click.iter().copied())
|
||||
.collect();
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Punctuation,
|
||||
suggestions: vec![Suggestion::replace_with_match_case(replacement, template)],
|
||||
message: "Hyphenate this mouse command.".to_owned(),
|
||||
priority: 40,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Hyphenates right-click style mouse commands."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::RightClick;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn hyphenates_basic_command() {
|
||||
assert_suggestion_result(
|
||||
"Right click the icon.",
|
||||
RightClick::default(),
|
||||
"Right-click the icon.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_with_preposition() {
|
||||
assert_suggestion_result(
|
||||
"Please right click on the link.",
|
||||
RightClick::default(),
|
||||
"Please right-click on the link.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_past_tense() {
|
||||
assert_suggestion_result(
|
||||
"They right clicked the submit button.",
|
||||
RightClick::default(),
|
||||
"They right-clicked the submit button.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_gerund() {
|
||||
assert_suggestion_result(
|
||||
"Right clicking the item highlights it.",
|
||||
RightClick::default(),
|
||||
"Right-clicking the item highlights it.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_plural_noun() {
|
||||
assert_suggestion_result(
|
||||
"Right clicks are tracked in the log.",
|
||||
RightClick::default(),
|
||||
"Right-clicks are tracked in the log.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_all_caps() {
|
||||
assert_suggestion_result(
|
||||
"He RIGHT CLICKED the file.",
|
||||
RightClick::default(),
|
||||
"He RIGHT-CLICKED the file.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_left_click() {
|
||||
assert_suggestion_result(
|
||||
"Left click the checkbox.",
|
||||
RightClick::default(),
|
||||
"Left-click the checkbox.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_middle_click() {
|
||||
assert_suggestion_result(
|
||||
"Middle click to open in a new tab.",
|
||||
RightClick::default(),
|
||||
"Middle-click to open in a new tab.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_hyphenated_form() {
|
||||
assert_lint_count("Right-click the icon.", RightClick::default(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_unrelated_right_and_click() {
|
||||
assert_lint_count(
|
||||
"Click the right button to continue.",
|
||||
RightClick::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -84,7 +84,7 @@ impl ExprLinter for ShootOneselfInTheFoot {
|
|||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Corrects non-standard variants of 'shoot oneself in the foot'."
|
||||
"Corrects nonstandard variants of 'shoot oneself in the foot'."
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,82 +1,12 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token,
|
||||
char_string::CharStringExt,
|
||||
expr::{All, Expr, FirstMatchOf, SequenceExpr},
|
||||
irregular_verbs::IrregularVerbs,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::{InflectionOfBe, WordSet},
|
||||
};
|
||||
|
||||
/// Maps common irregular verbs between their simple past and past participle forms.
|
||||
const IRREGULAR_VERBS: &[(&str, &str)] = &[
|
||||
("arose", "arisen"),
|
||||
("ate", "eaten"),
|
||||
("awoke", "awoken"),
|
||||
("bade", "bidden"),
|
||||
("became", "become"),
|
||||
("began", "begun"),
|
||||
("bit", "bitten"),
|
||||
("blew", "blown"),
|
||||
("bought", "bought"),
|
||||
("brang", "brung"),
|
||||
("broke", "broken"),
|
||||
("brought", "brought"),
|
||||
("came", "come"),
|
||||
("chose", "chosen"),
|
||||
("did", "done"),
|
||||
("drank", "drunk"),
|
||||
("drove", "driven"),
|
||||
("fell", "fallen"),
|
||||
("felt", "felt"),
|
||||
("flew", "flown"),
|
||||
("forgot", "forgotten"),
|
||||
("forwent", "forgone"),
|
||||
("gave", "given"),
|
||||
("grew", "grown"),
|
||||
("had", "had"),
|
||||
("heard", "heard"),
|
||||
("hit", "hit"),
|
||||
("input", "input"),
|
||||
("knew", "known"),
|
||||
("led", "led"),
|
||||
("mistook", "mistaken"),
|
||||
("output", "output"),
|
||||
("overtook", "overtaken"),
|
||||
("paid", "paid"),
|
||||
("partook", "partaken"),
|
||||
// proved, proved/proven
|
||||
("put", "put"),
|
||||
("ran", "run"),
|
||||
("rang", "rung"),
|
||||
("read", "read"),
|
||||
("reset", "reset"),
|
||||
("rode", "ridden"),
|
||||
("rose", "risen"),
|
||||
("sang", "sung"),
|
||||
("sank", "sunken"),
|
||||
("saw", "seen"),
|
||||
("set", "set"),
|
||||
("sewed", "sewn"),
|
||||
("slew", "slain"),
|
||||
("slid", "slid"),
|
||||
("spoke", "spoken"),
|
||||
("sprang", "sprung"),
|
||||
("stank", "stunk"),
|
||||
("stole", "stolen"),
|
||||
("stood", "stood"),
|
||||
("swam", "swum"),
|
||||
("swore", "sworn"),
|
||||
("thought", "thought"),
|
||||
("trod", "trodden"),
|
||||
("took", "taken"),
|
||||
// was, been
|
||||
// were, been
|
||||
("went", "gone"),
|
||||
("woke", "woken"),
|
||||
("wove", "woven"),
|
||||
("wrote", "written"),
|
||||
];
|
||||
|
||||
/// Corrects simple past tense verbs to past participle after auxiliary verbs like "have" or "be".
|
||||
pub struct SimplePastToPastParticiple {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -141,41 +71,32 @@ impl ExprLinter for SimplePastToPastParticiple {
|
|||
|
||||
let verb_tok = &toks[2];
|
||||
|
||||
let verb_ch = verb_tok.span.get_content(src);
|
||||
if !IRREGULAR_VERBS
|
||||
.iter()
|
||||
.any(|(t, p)| verb_ch.eq_ignore_ascii_case_str(t) && p != t)
|
||||
let simple_past = verb_tok.span.get_content_string(src);
|
||||
|
||||
if let Some(past_participle) = IrregularVerbs::curated()
|
||||
.get_past_participle_for_preterite(&simple_past)
|
||||
.filter(|pp| pp != &simple_past)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let suggestions = vec![Suggestion::replace_with_match_case(
|
||||
past_participle.chars().collect(),
|
||||
verb_tok.span.get_content(src),
|
||||
)];
|
||||
|
||||
let (simple_past, past_participle) = IRREGULAR_VERBS
|
||||
.iter()
|
||||
.find(|(simple_past, _)| {
|
||||
verb_tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_ignore_ascii_case_str(simple_past)
|
||||
let message = format!(
|
||||
"Use the past participle `{}` instead of `{}` when using compound tenses or passive voice.",
|
||||
past_participle, simple_past
|
||||
);
|
||||
|
||||
Some(Lint {
|
||||
span: verb_tok.span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
suggestions,
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let suggestions = vec![Suggestion::replace_with_match_case(
|
||||
past_participle.chars().collect(),
|
||||
verb_tok.span.get_content(src),
|
||||
)];
|
||||
|
||||
let message = format!(
|
||||
"Use the past participle `{}` instead of `{}` when using compound tenses or passive voice.",
|
||||
past_participle, simple_past
|
||||
);
|
||||
|
||||
Some(Lint {
|
||||
span: verb_tok.span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
suggestions,
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::expr::{DurationExpr, Expr, LongestMatchOf, SequenceExpr};
|
||||
use crate::{Lrc, Token, TokenStringExt};
|
||||
use crate::expr::{DurationExpr, Expr, SequenceExpr};
|
||||
use crate::{CharStringExt, Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
|
@ -25,23 +25,18 @@ pub struct SinceDuration {
|
|||
|
||||
impl Default for SinceDuration {
|
||||
fn default() -> Self {
|
||||
let pattern_without_ago = Lrc::new(
|
||||
SequenceExpr::default()
|
||||
.then_any_capitalization_of("since")
|
||||
.then_whitespace()
|
||||
.then(DurationExpr),
|
||||
);
|
||||
|
||||
let pattern_with_ago = SequenceExpr::default()
|
||||
.then(pattern_without_ago.clone())
|
||||
.then_whitespace()
|
||||
.then_any_capitalization_of("ago");
|
||||
|
||||
Self {
|
||||
expr: Box::new(LongestMatchOf::new(vec![
|
||||
Box::new(pattern_without_ago),
|
||||
Box::new(pattern_with_ago),
|
||||
])),
|
||||
expr: Box::new(
|
||||
SequenceExpr::default()
|
||||
.then_any_capitalization_of("since")
|
||||
.then_whitespace()
|
||||
.then(DurationExpr)
|
||||
.then_optional(
|
||||
SequenceExpr::default()
|
||||
.t_ws()
|
||||
.then_word_set(&["ago", "old"]),
|
||||
),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -55,7 +50,11 @@ impl ExprLinter for SinceDuration {
|
|||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let last = toks.last()?;
|
||||
if last.span.get_content_string(src).to_lowercase() == "ago" {
|
||||
if last
|
||||
.span
|
||||
.get_content(src)
|
||||
.eq_any_ignore_ascii_case_chars(&[&['a', 'g', 'o'], &['o', 'l', 'd']])
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +93,9 @@ impl ExprLinter for SinceDuration {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::SinceDuration;
|
||||
use crate::linting::tests::{assert_lint_count, assert_top3_suggestion_result};
|
||||
use crate::linting::tests::{
|
||||
assert_lint_count, assert_no_lints, assert_top3_suggestion_result,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn catches_spelled() {
|
||||
|
|
@ -107,10 +108,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn permits_spelled_with_ago() {
|
||||
assert_lint_count(
|
||||
assert_no_lints(
|
||||
"I have been waiting since two hours ago.",
|
||||
SinceDuration::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -125,10 +125,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn permits_numerals_with_ago() {
|
||||
assert_lint_count(
|
||||
assert_no_lints(
|
||||
"I have been waiting since 2 hours ago.",
|
||||
SinceDuration::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -287,4 +286,12 @@ mod tests {
|
|||
"I use a Wacom Cintiq 27QHDT for several years on Linux",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore_since_years_old() {
|
||||
assert_no_lints(
|
||||
"I've been coding since 11 years old and I'm now 57",
|
||||
SinceDuration::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
240
harper-core/src/linting/soon_to_be.rs
Normal file
240
harper-core/src/linting/soon_to_be.rs
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
use std::{ops::Range, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
Token, TokenKind, TokenStringExt,
|
||||
expr::{Expr, ExprMap, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::NominalPhrase,
|
||||
};
|
||||
|
||||
pub struct SoonToBe {
|
||||
expr: Box<dyn Expr>,
|
||||
map: Arc<ExprMap<Range<usize>>>,
|
||||
}
|
||||
|
||||
impl Default for SoonToBe {
|
||||
fn default() -> Self {
|
||||
let mut map = ExprMap::default();
|
||||
|
||||
let soon_to_be = || {
|
||||
SequenceExpr::default()
|
||||
.t_aco("soon")
|
||||
.t_ws()
|
||||
.t_aco("to")
|
||||
.t_ws()
|
||||
.t_aco("be")
|
||||
};
|
||||
|
||||
let nominal_tail = || {
|
||||
SequenceExpr::default()
|
||||
.then_optional(SequenceExpr::default().then_one_or_more_adverbs().t_ws())
|
||||
.then(NominalPhrase)
|
||||
};
|
||||
|
||||
let hyphenated_number_modifier = || {
|
||||
SequenceExpr::default()
|
||||
.then_number()
|
||||
.then_hyphen()
|
||||
.then_nominal()
|
||||
.then_optional(SequenceExpr::default().then_hyphen().then_adjective())
|
||||
.t_ws()
|
||||
.then_nominal()
|
||||
};
|
||||
|
||||
let hyphenated_compound = || {
|
||||
SequenceExpr::default()
|
||||
.then_kind_any(&[TokenKind::is_word_like as fn(&TokenKind) -> bool])
|
||||
.then_hyphen()
|
||||
.then_nominal()
|
||||
};
|
||||
|
||||
let trailing_phrase = || {
|
||||
SequenceExpr::default().then_any_of(vec![
|
||||
Box::new(hyphenated_number_modifier()),
|
||||
Box::new(hyphenated_compound()),
|
||||
Box::new(nominal_tail()),
|
||||
])
|
||||
};
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then_determiner()
|
||||
.t_ws()
|
||||
.then_seq(soon_to_be())
|
||||
.t_ws()
|
||||
.then_seq(trailing_phrase()),
|
||||
2..7,
|
||||
);
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then_seq(soon_to_be())
|
||||
.t_ws()
|
||||
.then_seq(trailing_phrase()),
|
||||
0..5,
|
||||
);
|
||||
|
||||
let map = Arc::new(map);
|
||||
|
||||
Self {
|
||||
expr: Box::new(map.clone()),
|
||||
map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for SoonToBe {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let range = self.map.lookup(0, matched_tokens, source)?;
|
||||
let span = matched_tokens.get(range.start..range.end)?.span()?;
|
||||
let template = span.get_content(source);
|
||||
|
||||
let mut nominal_found = false;
|
||||
for tok in matched_tokens.iter().skip(range.end) {
|
||||
if tok.kind.is_whitespace() || tok.kind.is_hyphen() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if tok.kind.is_punctuation() {
|
||||
break;
|
||||
}
|
||||
|
||||
if tok.kind.is_nominal() {
|
||||
if tok.kind.is_preposition() {
|
||||
continue;
|
||||
} else {
|
||||
nominal_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !nominal_found {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Miscellaneous,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"soon-to-be",
|
||||
template,
|
||||
)],
|
||||
message: "Use hyphens when `soon to be` modifies a noun.".to_owned(),
|
||||
priority: 31,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Hyphenates `soon-to-be` when it appears before a noun."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::SoonToBe;
|
||||
use crate::linting::tests::{assert_lint_count, assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn hyphenates_possessive_phrase() {
|
||||
assert_suggestion_result(
|
||||
"We met his soon to be boss at lunch.",
|
||||
SoonToBe::default(),
|
||||
"We met his soon-to-be boss at lunch.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_article_phrase() {
|
||||
assert_suggestion_result(
|
||||
"They toasted the soon to be couple.",
|
||||
SoonToBe::default(),
|
||||
"They toasted the soon-to-be couple.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_sentence_start() {
|
||||
assert_suggestion_result(
|
||||
"Soon to be parents filled the classroom.",
|
||||
SoonToBe::default(),
|
||||
"Soon-to-be parents filled the classroom.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_existing_hyphens() {
|
||||
assert_no_lints("We met his soon-to-be boss yesterday.", SoonToBe::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keeps_non_adjectival_use() {
|
||||
assert_no_lints("The concert is soon to be over.", SoonToBe::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_with_adverb() {
|
||||
assert_suggestion_result(
|
||||
"Our soon to be newly married friends visited.",
|
||||
SoonToBe::default(),
|
||||
"Our soon-to-be newly married friends visited.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_hyphenated_number_phrase() {
|
||||
assert_suggestion_result(
|
||||
"Our soon to be 5-year-old son starts school.",
|
||||
SoonToBe::default(),
|
||||
"Our soon-to-be 5-year-old son starts school.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_in_law_phrase() {
|
||||
assert_suggestion_result(
|
||||
"She thanked her soon to be in-laws for hosting.",
|
||||
SoonToBe::default(),
|
||||
"She thanked her soon-to-be in-laws for hosting.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_future_event() {
|
||||
assert_suggestion_result(
|
||||
"We reserved space for our soon to be celebration.",
|
||||
SoonToBe::default(),
|
||||
"We reserved space for our soon-to-be celebration.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_misaligned_verb_chain() {
|
||||
assert_lint_count(
|
||||
"They will soon to be moving overseas.",
|
||||
SoonToBe::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hyphenates_guest_example() {
|
||||
assert_suggestion_result(
|
||||
"I cooked for my soon to be guests.",
|
||||
SoonToBe::default(),
|
||||
"I cooked for my soon-to-be guests.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_rearranged_phrase() {
|
||||
assert_no_lints("We hope to soon be home.", SoonToBe::default());
|
||||
}
|
||||
}
|
||||
|
|
@ -480,4 +480,12 @@ mod tests {
|
|||
"'fill' is supposed to be 'fill'",
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn issue_2261() {
|
||||
assert_top3_suggestion_result(
|
||||
"Generaly",
|
||||
SpellCheck::new(FstDictionary::curated(), Dialect::British),
|
||||
"Generally",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
242
harper-core/src/linting/take_medicine.rs
Normal file
242
harper-core/src/linting/take_medicine.rs
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
use crate::{
|
||||
Token,
|
||||
expr::{Expr, OwnedExprExt, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::DerivedFrom,
|
||||
};
|
||||
|
||||
pub struct TakeMedicine {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for TakeMedicine {
|
||||
fn default() -> Self {
|
||||
let eat_verb = DerivedFrom::new_from_str("eat")
|
||||
.or(DerivedFrom::new_from_str("eats"))
|
||||
.or(DerivedFrom::new_from_str("ate"))
|
||||
.or(DerivedFrom::new_from_str("eating"))
|
||||
.or(DerivedFrom::new_from_str("eaten"));
|
||||
|
||||
let medication = DerivedFrom::new_from_str("antibiotic")
|
||||
.or(DerivedFrom::new_from_str("medicine"))
|
||||
.or(DerivedFrom::new_from_str("medication"))
|
||||
.or(DerivedFrom::new_from_str("pill"))
|
||||
.or(DerivedFrom::new_from_str("tablet"))
|
||||
.or(DerivedFrom::new_from_str("aspirin"))
|
||||
.or(DerivedFrom::new_from_str("paracetamol"));
|
||||
|
||||
let modifiers = SequenceExpr::default()
|
||||
.then_any_of(vec![
|
||||
Box::new(SequenceExpr::default().then_determiner()),
|
||||
Box::new(SequenceExpr::default().then_possessive_determiner()),
|
||||
Box::new(SequenceExpr::default().then_quantifier()),
|
||||
])
|
||||
.t_ws();
|
||||
|
||||
let adjectives = SequenceExpr::default().then_one_or_more_adjectives().t_ws();
|
||||
|
||||
let pattern = SequenceExpr::default()
|
||||
.then(eat_verb)
|
||||
.t_ws()
|
||||
.then_optional(modifiers)
|
||||
.then_optional(adjectives)
|
||||
.then(medication);
|
||||
|
||||
Self {
|
||||
expr: Box::new(pattern),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn replacement_for(
|
||||
verb: &Token,
|
||||
source: &[char],
|
||||
base: &str,
|
||||
third_person: &str,
|
||||
past: &str,
|
||||
past_participle: &str,
|
||||
progressive: &str,
|
||||
) -> Suggestion {
|
||||
let replacement = if verb.kind.is_verb_progressive_form() {
|
||||
progressive
|
||||
} else if verb.kind.is_verb_third_person_singular_present_form() {
|
||||
third_person
|
||||
} else if verb.kind.is_verb_past_participle_form() && !verb.kind.is_verb_simple_past_form() {
|
||||
past_participle
|
||||
} else if verb.kind.is_verb_simple_past_form() {
|
||||
past
|
||||
} else {
|
||||
base
|
||||
};
|
||||
|
||||
Suggestion::replace_with_match_case(
|
||||
replacement.chars().collect(),
|
||||
verb.span.get_content(source),
|
||||
)
|
||||
}
|
||||
|
||||
impl ExprLinter for TakeMedicine {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let verb = matched_tokens.first()?;
|
||||
let span = verb.span;
|
||||
|
||||
let suggestions = vec![
|
||||
replacement_for(verb, source, "take", "takes", "took", "taken", "taking"),
|
||||
replacement_for(
|
||||
verb,
|
||||
source,
|
||||
"swallow",
|
||||
"swallows",
|
||||
"swallowed",
|
||||
"swallowed",
|
||||
"swallowing",
|
||||
),
|
||||
];
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions,
|
||||
message: "Use a verb like `take` or `swallow` with medicine instead of `eat`."
|
||||
.to_string(),
|
||||
priority: 63,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Encourages pairing medicine-related nouns with verbs like `take` or `swallow` instead of `eat`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TakeMedicine;
|
||||
use crate::linting::tests::{
|
||||
assert_lint_count, assert_nth_suggestion_result, assert_suggestion_result,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn swaps_ate_antibiotics() {
|
||||
assert_suggestion_result(
|
||||
"I ate antibiotics for a week.",
|
||||
TakeMedicine::default(),
|
||||
"I took antibiotics for a week.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swaps_eat_medicine() {
|
||||
assert_suggestion_result(
|
||||
"You should eat the medicine now.",
|
||||
TakeMedicine::default(),
|
||||
"You should take the medicine now.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swaps_eats_medication() {
|
||||
assert_suggestion_result(
|
||||
"She eats medication daily.",
|
||||
TakeMedicine::default(),
|
||||
"She takes medication daily.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swaps_eating_medicines() {
|
||||
assert_suggestion_result(
|
||||
"Are you eating medicines for that illness?",
|
||||
TakeMedicine::default(),
|
||||
"Are you taking medicines for that illness?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swaps_eaten_medication() {
|
||||
assert_suggestion_result(
|
||||
"He has eaten medication already.",
|
||||
TakeMedicine::default(),
|
||||
"He has taken medication already.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swaps_eat_pills() {
|
||||
assert_suggestion_result(
|
||||
"He ate the pills without water.",
|
||||
TakeMedicine::default(),
|
||||
"He took the pills without water.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swaps_eating_paracetamol() {
|
||||
assert_suggestion_result(
|
||||
"She is eating paracetamol for her headache.",
|
||||
TakeMedicine::default(),
|
||||
"She is taking paracetamol for her headache.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_possessive_modifier() {
|
||||
assert_suggestion_result(
|
||||
"Please eat my antibiotics.",
|
||||
TakeMedicine::default(),
|
||||
"Please take my antibiotics.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_adjectives() {
|
||||
assert_suggestion_result(
|
||||
"They ate the prescribed antibiotics.",
|
||||
TakeMedicine::default(),
|
||||
"They took the prescribed antibiotics.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn supports_uppercase() {
|
||||
assert_suggestion_result(
|
||||
"Eat antibiotics with water.",
|
||||
TakeMedicine::default(),
|
||||
"Take antibiotics with water.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offers_swallow_alternative() {
|
||||
assert_nth_suggestion_result(
|
||||
"He ate the medication without water.",
|
||||
TakeMedicine::default(),
|
||||
"He swallowed the medication without water.",
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_correct_usage() {
|
||||
assert_lint_count(
|
||||
"She took antibiotics last winter.",
|
||||
TakeMedicine::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_unrelated_eat() {
|
||||
assert_lint_count(
|
||||
"They ate dinner after taking medicine.",
|
||||
TakeMedicine::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
CharStringExt, Token,
|
||||
Token, TokenKind,
|
||||
expr::SequenceExpr,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
};
|
||||
|
|
@ -11,14 +11,10 @@ pub struct Theres {
|
|||
|
||||
impl Default for Theres {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::aco("their's")
|
||||
.t_ws()
|
||||
.then(|tok: &Token, src: &[char]| {
|
||||
tok.kind.is_determiner()
|
||||
|| tok.kind.is_quantifier()
|
||||
|| tok.span.get_content(src).eq_ignore_ascii_case_str("no")
|
||||
|| tok.span.get_content(src).eq_ignore_ascii_case_str("enough")
|
||||
});
|
||||
let expr = SequenceExpr::aco("their's").t_ws().then_kind_any_or_words(
|
||||
&[TokenKind::is_determiner, TokenKind::is_quantifier] as &[_],
|
||||
&["no", "enough"],
|
||||
);
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
|
|
|
|||
|
|
@ -1,267 +0,0 @@
|
|||
use crate::char_string::CharStringExt;
|
||||
use crate::patterns::WhitespacePattern;
|
||||
use crate::{
|
||||
Token, TokenKind,
|
||||
expr::{AnchorEnd, AnchorStart, Expr, FirstMatchOf, SequenceExpr},
|
||||
};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct ToToo {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for ToToo {
|
||||
fn default() -> Self {
|
||||
// to + adjective (but not also a verb), allowing optional following word/punct.
|
||||
// Decision about linting is refined in `match_to_lint` to avoid false positives.
|
||||
let to_before_adjective_loose = SequenceExpr::default()
|
||||
.t_aco("to")
|
||||
.t_ws()
|
||||
.then_kind_is_but_is_not_except(
|
||||
TokenKind::is_adjective,
|
||||
TokenKind::is_verb,
|
||||
&["standard"],
|
||||
)
|
||||
.then_optional(WhitespacePattern)
|
||||
.then_optional(SequenceExpr::default().then_any_word())
|
||||
.then_optional(WhitespacePattern)
|
||||
.then_optional(SequenceExpr::default().then_punctuation());
|
||||
|
||||
// to + adverb + (punct | end)
|
||||
// to + adverb + (punct | end)
|
||||
let to_before_adverb = SequenceExpr::default()
|
||||
.t_aco("to")
|
||||
.t_ws()
|
||||
.then_kind_is_but_is_not_except(
|
||||
TokenKind::is_adverb,
|
||||
|_| false,
|
||||
&["as"],
|
||||
)
|
||||
.then_optional(WhitespacePattern)
|
||||
.then_any_of(vec![
|
||||
Box::new(SequenceExpr::default().then_kind_is_but_is_not_except(
|
||||
TokenKind::is_punctuation,
|
||||
|_| false,
|
||||
&["`", "\"", "'", "“", "”", "‘", "’"],
|
||||
)),
|
||||
Box::new(SequenceExpr::default().then_unless(SequenceExpr::default().t_any())),
|
||||
]);
|
||||
|
||||
// to + adjective-verb (past participle like "tired") + punctuation (non-quote, non-dash)
|
||||
// Helps catch cases like "to tired." while avoiding base verbs like "to dominate."
|
||||
let to_before_adj_verb_ed_punct = SequenceExpr::default()
|
||||
.t_aco("to")
|
||||
.t_ws()
|
||||
.then(|tok: &Token, src: &[char]| {
|
||||
tok.kind.is_adjective()
|
||||
&& tok.kind.is_verb()
|
||||
&& !tok.kind.is_noun()
|
||||
&& tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.iter()
|
||||
.collect::<String>()
|
||||
.to_lowercase()
|
||||
.ends_with("ed")
|
||||
})
|
||||
.then_optional(WhitespacePattern)
|
||||
.then_kind_is_but_is_not_except(
|
||||
TokenKind::is_punctuation,
|
||||
|_| false,
|
||||
&["`", "\"", "'", "“", "”", "‘", "’", "-", "–", "—"],
|
||||
);
|
||||
|
||||
// to + adjective (any, including words that can also be verbs) + punctuation (non-quote, non-dash)
|
||||
let to_before_adjective_strict_punct = SequenceExpr::default()
|
||||
.t_aco("to")
|
||||
.t_ws()
|
||||
.then_kind_is_but_is_not_except(
|
||||
TokenKind::is_adjective,
|
||||
TokenKind::is_verb,
|
||||
&["standard"],
|
||||
)
|
||||
.then_optional(WhitespacePattern)
|
||||
.then_kind_is_but_is_not_except(
|
||||
TokenKind::is_punctuation,
|
||||
|_| false,
|
||||
&["`", "\"", "'", "“", "”", "‘", "’", "-", "–", "—"],
|
||||
);
|
||||
|
||||
// to + (many|much|few) + (punct|end) to avoid "connected to many ..."
|
||||
let to_before_degree_words = SequenceExpr::default()
|
||||
.t_aco("to")
|
||||
.t_ws()
|
||||
.then_word_set(&["many", "much", "few"])
|
||||
.t_ws_opt()
|
||||
.then_any_of(vec![
|
||||
Box::new(SequenceExpr::default().then_kind_is_but_is_not_except(
|
||||
TokenKind::is_punctuation,
|
||||
|_| false,
|
||||
&["`", "\"", "'", "“", "”", "‘", "’", "-", "–", "—"],
|
||||
)),
|
||||
Box::new(AnchorEnd),
|
||||
]);
|
||||
|
||||
let chunk_start_to_pause = SequenceExpr::default()
|
||||
.then(AnchorStart)
|
||||
.t_aco("to")
|
||||
.then_optional(WhitespacePattern)
|
||||
.then_comma();
|
||||
|
||||
// (start|punct) + pronoun + to + (punct_without_quotes | end)
|
||||
let pronoun_to_end = SequenceExpr::default()
|
||||
.then_any_of(vec![
|
||||
Box::new(SequenceExpr::default().then_anchor_start()),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.then_kind_is_but_is_not_except(
|
||||
TokenKind::is_punctuation,
|
||||
|_| false,
|
||||
&["`", "\"", "'", "“", "”", "‘", "’"],
|
||||
)
|
||||
.t_ws_opt(),
|
||||
),
|
||||
])
|
||||
.then_pronoun()
|
||||
.t_ws()
|
||||
.t_aco("to")
|
||||
.then_any_of(vec![
|
||||
Box::new(SequenceExpr::default().then_kind_is_but_is_not_except(
|
||||
TokenKind::is_punctuation,
|
||||
|_| false,
|
||||
&["`", "\"", "'", "“", "”", "‘", "’"],
|
||||
)),
|
||||
Box::new(AnchorEnd),
|
||||
]);
|
||||
|
||||
let expr = FirstMatchOf::new(vec![
|
||||
Box::new(to_before_adj_verb_ed_punct),
|
||||
Box::new(to_before_adjective_strict_punct),
|
||||
Box::new(to_before_adverb),
|
||||
Box::new(to_before_degree_words),
|
||||
Box::new(pronoun_to_end),
|
||||
Box::new(chunk_start_to_pause),
|
||||
Box::new(to_before_adjective_loose),
|
||||
]);
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for ToToo {
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
// The expression ensures only valid contexts reach here.
|
||||
let to_tok = tokens.iter().find(|t| {
|
||||
t.span
|
||||
.get_content(source)
|
||||
.eq_ignore_ascii_case_chars(&['t', 'o'])
|
||||
})?;
|
||||
|
||||
// Decide if this match should lint based on the token following `to`.
|
||||
// Find the next non-whitespace token after `to` (if any)
|
||||
let to_index = tokens
|
||||
.iter()
|
||||
.position(|t| {
|
||||
t.span
|
||||
.get_content(source)
|
||||
.eq_ignore_ascii_case_chars(&['t', 'o'])
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
// Find index of the first non-whitespace token after `to`
|
||||
let mut idx = to_index + 1;
|
||||
while idx < tokens.len() && tokens[idx].kind.is_whitespace() {
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
let should_lint = if idx < tokens.len() {
|
||||
let next = &tokens[idx];
|
||||
let next_text: String = next.span.get_content(source).iter().collect();
|
||||
let next_lower = next_text.to_lowercase();
|
||||
// Find token after `next` ignoring whitespace, if any
|
||||
let mut j = idx + 1;
|
||||
while j < tokens.len() && tokens[j].kind.is_whitespace() {
|
||||
j += 1;
|
||||
}
|
||||
let after_next_non_ws = if j < tokens.len() {
|
||||
Some(&tokens[j])
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Branch: degree words
|
||||
if matches!(next_lower.as_str(), "many" | "much" | "few") {
|
||||
true
|
||||
// Branch: punctuation or end after pronoun (", to.", "Me to!")
|
||||
} else if next.kind.is_punctuation() {
|
||||
true
|
||||
// Branch: adverb
|
||||
} else if next.kind.is_adverb() {
|
||||
// Only when followed by non-quote, non-dash punctuation or end-of-slice
|
||||
match after_next_non_ws {
|
||||
None => true,
|
||||
Some(t) => {
|
||||
if t.kind.is_punctuation() {
|
||||
let punct: String = t.span.get_content(source).iter().collect();
|
||||
!matches!(
|
||||
punct.as_str(),
|
||||
"`" | "\"" | "'" | "“" | "”" | "‘" | "’" | "-" | "–" | "—"
|
||||
)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
// Branch: adjective
|
||||
} else if next.kind.is_adjective() {
|
||||
// Avoid specific exception (commonly prepositional phrase)
|
||||
if next_lower == "standard" {
|
||||
return None;
|
||||
}
|
||||
match after_next_non_ws {
|
||||
None => true, // end-of-slice (no following token captured)
|
||||
Some(t) if t.kind.is_punctuation() => {
|
||||
let punct: String = t.span.get_content(source).iter().collect();
|
||||
!matches!(
|
||||
punct.as_str(),
|
||||
"`" | "\"" | "'" | "“" | "”" | "‘" | "’" | "-" | "–" | "—"
|
||||
)
|
||||
}
|
||||
// If a word follows, do not lint (likely "to ADJ NOUN" prepositional phrase)
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
// No token after `to` (end of chunk) — don't lint.
|
||||
false
|
||||
};
|
||||
|
||||
if !should_lint {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: to_tok.span,
|
||||
lint_kind: LintKind::WordChoice,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"too",
|
||||
to_tok.span.get_content(source),
|
||||
)],
|
||||
message: "Use `too` here to mean ‘also’ or an excessive degree.".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Corrects mistaken `to` to `too` when it means ‘also’ or an excessive degree."
|
||||
}
|
||||
}
|
||||
|
|
@ -23,10 +23,7 @@ impl Default for ToTooAdjVerbEdPunct {
|
|||
&& tok
|
||||
.span
|
||||
.get_content(src)
|
||||
.iter()
|
||||
.collect::<String>()
|
||||
.to_lowercase()
|
||||
.ends_with("ed")
|
||||
.ends_with_ignore_ascii_case_chars(&['e', 'd'])
|
||||
})
|
||||
.then_sentence_terminator();
|
||||
|
||||
|
|
|
|||
181
harper-core/src/linting/transposed_space.rs
Normal file
181
harper-core/src/linting/transposed_space.rs
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
use crate::{
|
||||
Lint, Token, TokenStringExt,
|
||||
expr::{Expr, FirstMatchOf, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
spell::Dictionary,
|
||||
};
|
||||
|
||||
pub struct TransposedSpace<D: Dictionary + 'static> {
|
||||
expr: Box<dyn Expr>,
|
||||
dict: D,
|
||||
}
|
||||
|
||||
impl<D: Dictionary + 'static> TransposedSpace<D> {
|
||||
pub fn new(dict: D) -> Self {
|
||||
Self {
|
||||
expr: Box::new(FirstMatchOf::new(vec![Box::new(
|
||||
SequenceExpr::default().then_oov().t_ws().then_oov(),
|
||||
)])),
|
||||
dict,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sensitive(dict: D) -> Self {
|
||||
Self {
|
||||
expr: Box::new(FirstMatchOf::new(vec![
|
||||
Box::new(SequenceExpr::default().then_oov().t_ws().then_any_word()),
|
||||
Box::new(SequenceExpr::default().then_any_word().t_ws().then_oov()),
|
||||
Box::new(SequenceExpr::default().then_oov().t_ws().then_oov()),
|
||||
])),
|
||||
dict,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn keep_unique(values: &mut Vec<String>, word1: &[char], word2: &[char]) {
|
||||
let value = format!(
|
||||
"{} {}",
|
||||
word1.iter().collect::<String>(),
|
||||
word2.iter().collect::<String>()
|
||||
);
|
||||
if !values.contains(&value) {
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dictionary + 'static> ExprLinter for TransposedSpace<D> {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let toks_span = toks.span()?;
|
||||
|
||||
// "thec" "at" / "th ecat"
|
||||
let word1 = toks.first()?.span.get_content(src);
|
||||
let word2 = toks.last()?.span.get_content(src);
|
||||
|
||||
// "thec" -> "the c"
|
||||
let w1_start = &word1[..word1.len() - 1];
|
||||
let w1_last = word1.iter().last()?;
|
||||
|
||||
// "ecat" -> "e cat"
|
||||
let w2_first = word2.first()?;
|
||||
let w2_end = &word2[1..];
|
||||
|
||||
// "c" + "at" -> "cat"
|
||||
let mut w1_last_plus_w2 = word2.to_vec();
|
||||
w1_last_plus_w2.insert(0, *w1_last);
|
||||
|
||||
// "th" + "e" -> "the"
|
||||
let mut w1_plus_w2_first = word1.to_vec();
|
||||
w1_plus_w2_first.push(*w2_first);
|
||||
|
||||
let mut values = vec![];
|
||||
|
||||
// "thec" "at" -> "the cat"
|
||||
if self.dict.contains_word(w1_start) && self.dict.contains_word(&w1_last_plus_w2) {
|
||||
let maybe_canon_w2 = self.dict.get_correct_capitalization_of(&w1_last_plus_w2);
|
||||
if let Some(canon_w1) = self.dict.get_correct_capitalization_of(w1_start) {
|
||||
if let Some(canon_w2) = maybe_canon_w2 {
|
||||
keep_unique(&mut values, canon_w1, canon_w2);
|
||||
} else {
|
||||
keep_unique(&mut values, canon_w1, &w1_last_plus_w2);
|
||||
}
|
||||
} else if let Some(canon_w2) = maybe_canon_w2 {
|
||||
keep_unique(&mut values, w1_start, canon_w2);
|
||||
}
|
||||
|
||||
keep_unique(&mut values, w1_start, &w1_last_plus_w2);
|
||||
}
|
||||
|
||||
// "th" "ecat" -> "the cat"
|
||||
if self.dict.contains_word(&w1_plus_w2_first) && self.dict.contains_word(w2_end) {
|
||||
let maybe_canon_w2 = self.dict.get_correct_capitalization_of(w2_end);
|
||||
if let Some(canon_w1) = self.dict.get_correct_capitalization_of(&w1_plus_w2_first) {
|
||||
if let Some(canon_w2) = maybe_canon_w2 {
|
||||
keep_unique(&mut values, canon_w1, canon_w2);
|
||||
} else {
|
||||
keep_unique(&mut values, canon_w1, w2_end);
|
||||
}
|
||||
} else if let Some(canon_w2) = maybe_canon_w2 {
|
||||
keep_unique(&mut values, &w1_plus_w2_first, canon_w2);
|
||||
}
|
||||
|
||||
keep_unique(&mut values, &w1_plus_w2_first, w2_end);
|
||||
}
|
||||
|
||||
if values.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let suggestions = values
|
||||
.iter()
|
||||
.map(|value| {
|
||||
Suggestion::replace_with_match_case(
|
||||
value.chars().collect(),
|
||||
toks_span.get_content(src),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Some(Lint {
|
||||
span: toks_span,
|
||||
lint_kind: LintKind::Typo,
|
||||
suggestions,
|
||||
message: format!(
|
||||
"Is the space between `{}` and `{}` one character out of place?",
|
||||
word1.iter().collect::<String>(),
|
||||
word2.iter().collect::<String>()
|
||||
),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Looks for a space one character too early or too late between words."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TransposedSpace;
|
||||
use crate::{linting::tests::assert_suggestion_result, spell::FstDictionary};
|
||||
|
||||
#[test]
|
||||
fn space_too_early() {
|
||||
assert_suggestion_result(
|
||||
"Th ecat sat on the mat.",
|
||||
TransposedSpace::sensitive(FstDictionary::curated()),
|
||||
"The cat sat on the mat.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn space_too_late() {
|
||||
assert_suggestion_result(
|
||||
"Thec at sat on the mat.",
|
||||
TransposedSpace::sensitive(FstDictionary::curated()),
|
||||
"The cat sat on the mat.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_early() {
|
||||
assert_suggestion_result(
|
||||
"Sometimes the spac eis one character early.",
|
||||
TransposedSpace::new(FstDictionary::curated()),
|
||||
"Sometimes the space is one character early.",
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_late() {
|
||||
assert_suggestion_result(
|
||||
"Ands ometimes the space is a character late.",
|
||||
TransposedSpace::new(FstDictionary::curated()),
|
||||
"And sometimes the space is a character late.",
|
||||
);
|
||||
}
|
||||
}
|
||||
69
harper-core/src/linting/use_title_case.rs
Normal file
69
harper-core/src/linting/use_title_case.rs
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
use crate::{Document, TokenStringExt, spell::Dictionary, title_case::try_make_title_case};
|
||||
|
||||
use super::{Lint, LintKind, Linter, Suggestion};
|
||||
|
||||
pub struct UseTitleCase<D: Dictionary + 'static> {
|
||||
dict: D,
|
||||
}
|
||||
|
||||
impl<D: Dictionary + 'static> UseTitleCase<D> {
|
||||
pub fn new(dict: D) -> Self {
|
||||
Self { dict }
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dictionary + 'static> Linter for UseTitleCase<D> {
|
||||
fn lint(&mut self, document: &Document) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
|
||||
for heading in document.iter_headings() {
|
||||
let Some(span) = heading.span() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if let Some(title_case) =
|
||||
try_make_title_case(heading, document.get_source(), &self.dict)
|
||||
{
|
||||
lints.push(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Capitalization,
|
||||
suggestions: vec![Suggestion::ReplaceWith(title_case)],
|
||||
message: "Try to use title case in headings.".to_owned(),
|
||||
priority: 127,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
lints
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Prompts you to use title case in relevant headings."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::tests::assert_suggestion_result;
|
||||
use crate::spell::FstDictionary;
|
||||
|
||||
use super::UseTitleCase;
|
||||
|
||||
#[test]
|
||||
fn simple_correction() {
|
||||
assert_suggestion_result(
|
||||
"# This is a title",
|
||||
UseTitleCase::new(FstDictionary::curated()),
|
||||
"# This Is a Title",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_correction() {
|
||||
assert_suggestion_result(
|
||||
"# This is a title\n\n## This is a subtitle",
|
||||
UseTitleCase::new(FstDictionary::curated()),
|
||||
"# This Is a Title\n\n## This Is a Subtitle",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -19,18 +19,17 @@ impl Default for VerbToAdjective {
|
|||
let expr = SequenceExpr::default()
|
||||
.then(WordSet::new(&["the", "a", "an"]))
|
||||
.t_ws()
|
||||
.then(|tok: &Token, _: &[char]| {
|
||||
(tok.kind.is_verb()
|
||||
&& !tok.kind.is_verb_past_form()
|
||||
&& !tok.kind.is_adjective()
|
||||
&& !tok.kind.is_noun())
|
||||
|| tok.kind.is_degree_adverb()
|
||||
.then_kind_where(|kind| {
|
||||
(kind.is_verb()
|
||||
&& !kind.is_verb_past_form()
|
||||
&& !kind.is_adjective()
|
||||
&& !kind.is_noun())
|
||||
|| kind.is_degree_adverb()
|
||||
})
|
||||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::NOUN, UPOS::PROPN]));
|
||||
|
||||
let exceptions = SequenceExpr::default()
|
||||
.t_any()
|
||||
let exceptions = SequenceExpr::anything()
|
||||
.t_any()
|
||||
.then_unless(WordSet::new(&["very"]));
|
||||
|
||||
|
|
|
|||
|
|
@ -20,8 +20,7 @@ impl Default for WayTooAdjective {
|
|||
.t_ws()
|
||||
.then(UPOSSet::new(&[UPOS::ADJ]).or(WordSet::new(&["much"])));
|
||||
|
||||
let exceptions = SequenceExpr::default()
|
||||
.t_any()
|
||||
let exceptions = SequenceExpr::anything()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
|
|
|
|||
167
harper-core/src/linting/wish_could.rs
Normal file
167
harper-core/src/linting/wish_could.rs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
use super::{Lint, LintKind, Suggestion};
|
||||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::{ExprLinter, expr_linter::Chunk};
|
||||
|
||||
pub struct WishCould {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for WishCould {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&["wish", "wished", "wishes", "wishing"])
|
||||
.t_ws()
|
||||
.then_any_of(vec![
|
||||
Box::new(SequenceExpr::default().then_subject_pronoun()),
|
||||
Box::new(SequenceExpr::word_set(&[
|
||||
// Elective existential indefinite pronouns
|
||||
"anybody",
|
||||
"anyone",
|
||||
// Universal indefinite pronouns
|
||||
"everybody",
|
||||
"everyone",
|
||||
// Negative indefinite pronouns (correct)
|
||||
"nobody",
|
||||
// Negative indefinite pronouns (incorrect)
|
||||
"noone",
|
||||
// Assertive existential indefinite pronouns
|
||||
"somebody",
|
||||
"someone",
|
||||
// Demonstrative pronouns
|
||||
"these",
|
||||
"this",
|
||||
"those",
|
||||
])),
|
||||
])
|
||||
.t_ws()
|
||||
.t_aco("can"),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for WishCould {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
&*self.expr
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let can_tok = toks.last()?;
|
||||
let can_span = can_tok.span;
|
||||
|
||||
Some(Lint {
|
||||
span: can_span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"could",
|
||||
can_span.get_content(src),
|
||||
)],
|
||||
message: "Use 'could' instead of 'can' after 'wish'.".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Checks for `can` being used after `wish` when it should be `could`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::linting::tests::assert_suggestion_result;
|
||||
|
||||
#[test]
|
||||
fn flag_wish_we_can() {
|
||||
assert_suggestion_result(
|
||||
"i wish we can spend more time together",
|
||||
WishCould::default(),
|
||||
"i wish we could spend more time together",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_wish_i_can() {
|
||||
assert_suggestion_result(
|
||||
"I wish I can finally forgive myself for all the things I am not.",
|
||||
WishCould::default(),
|
||||
"I wish I could finally forgive myself for all the things I am not.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_wish_you_can() {
|
||||
assert_suggestion_result(
|
||||
"I wish you can find your true love.",
|
||||
WishCould::default(),
|
||||
"I wish you could find your true love.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_wishes_they_can() {
|
||||
assert_suggestion_result(
|
||||
"What your Therapist wishes they can tell you.",
|
||||
WishCould::default(),
|
||||
"What your Therapist wishes they could tell you.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_wishing_someone_can() {
|
||||
assert_suggestion_result(
|
||||
"Forever wishing someone can point me in the right direction",
|
||||
WishCould::default(),
|
||||
"Forever wishing someone could point me in the right direction",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_wish_they_can() {
|
||||
assert_suggestion_result(
|
||||
"I wish they can plant more trees on this road.",
|
||||
WishCould::default(),
|
||||
"I wish they could plant more trees on this road.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_wished_he_can() {
|
||||
assert_suggestion_result(
|
||||
"I just wished he can talk and tell me how he feels",
|
||||
WishCould::default(),
|
||||
"I just wished he could talk and tell me how he feels",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wish_this_can() {
|
||||
assert_suggestion_result(
|
||||
"but I wish this can be fixed by Electron team",
|
||||
WishCould::default(),
|
||||
"but I wish this could be fixed by Electron team",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wish_it_can() {
|
||||
assert_suggestion_result(
|
||||
"Wish it can be supported.",
|
||||
WishCould::default(),
|
||||
"Wish it could be supported.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wish_somebody_can() {
|
||||
assert_suggestion_result(
|
||||
"I wish somebody can fix this issue.",
|
||||
WishCould::default(),
|
||||
"I wish somebody could fix this issue.",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -204,7 +204,16 @@ impl Parser for Markdown {
|
|||
});
|
||||
stack.push(pulldown_cmark::Tag::List(v));
|
||||
}
|
||||
pulldown_cmark::Event::Start(tag) => stack.push(tag),
|
||||
pulldown_cmark::Event::Start(tag) => {
|
||||
if matches!(tag, pulldown_cmark::Tag::Heading { .. }) {
|
||||
tokens.push(Token {
|
||||
span: Span::new_with_len(span_start, 0),
|
||||
kind: TokenKind::HeadingStart,
|
||||
});
|
||||
}
|
||||
|
||||
stack.push(tag)
|
||||
}
|
||||
pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Paragraph)
|
||||
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Item)
|
||||
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Heading(_))
|
||||
|
|
@ -575,4 +584,28 @@ Paragraph.
|
|||
let parser = Markdown::new(opts);
|
||||
let _res = parser.parse_str("//{@j");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_headings_are_marked() {
|
||||
let opts = MarkdownOptions::default();
|
||||
let parser = Markdown::new(opts);
|
||||
let tokens = parser.parse_str("# This is a simple heading");
|
||||
|
||||
assert_eq!(tokens.iter_heading_starts().count(), 1);
|
||||
assert_eq!(tokens.iter_headings().count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_headings_are_marked() {
|
||||
let opts = MarkdownOptions::default();
|
||||
let parser = Markdown::new(opts);
|
||||
let tokens = parser.parse_str(
|
||||
r#"# This is a simple heading
|
||||
|
||||
## This is a second simple heading"#,
|
||||
);
|
||||
|
||||
assert_eq!(tokens.iter_heading_starts().count(), 2);
|
||||
assert_eq!(tokens.iter_headings().count(), 2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ mod collapse_identifiers;
|
|||
mod isolate_english;
|
||||
mod markdown;
|
||||
mod mask;
|
||||
mod oops_all_headings;
|
||||
mod org_mode;
|
||||
mod plain_english;
|
||||
|
||||
|
|
@ -12,6 +13,7 @@ pub use collapse_identifiers::CollapseIdentifiers;
|
|||
pub use isolate_english::IsolateEnglish;
|
||||
pub use markdown::{Markdown, MarkdownOptions};
|
||||
pub use mask::Mask;
|
||||
pub use oops_all_headings::OopsAllHeadings;
|
||||
pub use org_mode::OrgMode;
|
||||
pub use plain_english::PlainEnglish;
|
||||
|
||||
|
|
|
|||
49
harper-core/src/parsers/oops_all_headings.rs
Normal file
49
harper-core/src/parsers/oops_all_headings.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
use crate::{Span, Token, TokenKind};
|
||||
|
||||
use super::Parser;
|
||||
|
||||
/// A parser that wraps another, forcing the entirety of the document to be composed of headings.
|
||||
pub struct OopsAllHeadings<P: Parser + 'static> {
|
||||
inner: P,
|
||||
}
|
||||
|
||||
impl<P: Parser + 'static> OopsAllHeadings<P> {
|
||||
pub fn new(inner: P) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: Parser + 'static> Parser for OopsAllHeadings<P> {
|
||||
fn parse(&self, source: &[char]) -> Vec<Token> {
|
||||
let inner = self.inner.parse(source);
|
||||
let mut output = Vec::with_capacity(inner.capacity());
|
||||
|
||||
output.push(Token {
|
||||
span: Span::default(),
|
||||
kind: TokenKind::HeadingStart,
|
||||
});
|
||||
|
||||
let mut iter = inner.into_iter().peekable();
|
||||
|
||||
while let Some(tok) = iter.next() {
|
||||
let heading_start = if tok.kind.is_paragraph_break()
|
||||
&& iter.peek().is_some_and(|t| !t.kind.is_heading_start())
|
||||
{
|
||||
Some(Token {
|
||||
span: Span::new_with_len(tok.span.end, 0),
|
||||
kind: TokenKind::HeadingStart,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
output.push(tok);
|
||||
|
||||
if let Some(extra) = heading_start {
|
||||
output.push(extra);
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
|
|
@ -2,20 +2,43 @@ use super::{Pattern, WordSet};
|
|||
|
||||
pub struct ModalVerb {
|
||||
inner: WordSet,
|
||||
include_common_errors: bool,
|
||||
}
|
||||
|
||||
impl Default for ModalVerb {
|
||||
fn default() -> Self {
|
||||
let (words, include_common_errors) = Self::init(false);
|
||||
Self {
|
||||
inner: words,
|
||||
include_common_errors,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ModalVerb {
|
||||
fn init(include_common_errors: bool) -> (WordSet, bool) {
|
||||
let modals = [
|
||||
"can", "can't", "could", "may", "might", "must", "shall", "should", "will", "would",
|
||||
"ought", "dare",
|
||||
"can", "can't", "could", "may", "might", "must", "shall", "shan't", "should", "will",
|
||||
"won't", "would", "ought", "dare",
|
||||
];
|
||||
|
||||
let mut words = WordSet::new(&modals);
|
||||
modals.iter().for_each(|word| {
|
||||
words.add(&format!("{word}n't"));
|
||||
if include_common_errors {
|
||||
words.add(&format!("{word}nt"));
|
||||
}
|
||||
});
|
||||
words.add("cannot");
|
||||
(words, include_common_errors)
|
||||
}
|
||||
|
||||
Self { inner: words }
|
||||
pub fn with_common_errors() -> Self {
|
||||
let (words, _) = Self::init(true);
|
||||
Self {
|
||||
inner: words,
|
||||
include_common_errors: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ use crate::Token;
|
|||
|
||||
use super::Pattern;
|
||||
|
||||
/// A pattern that uses primitive syntax-tree heuristics to locate nominal phrases.
|
||||
/// Given that it does not take context into account, it is not recommended for new code.
|
||||
/// Please prefer [`DictWordMetadata::np_member`](crate::DictWordMetadata::np_member).
|
||||
#[derive(Default)]
|
||||
pub struct NominalPhrase;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use super::{MutableDictionary, WordId};
|
||||
use fst::{IntoStreamer, Map as FstMap, Streamer, map::StreamWithState};
|
||||
use hashbrown::HashMap;
|
||||
use lazy_static::lazy_static;
|
||||
use levenshtein_automata::{DFA, LevenshteinAutomatonBuilder};
|
||||
use std::borrow::Cow;
|
||||
|
|
@ -150,28 +151,35 @@ impl Dictionary for FstDictionary {
|
|||
let upper_dists = stream_distances_vec(&mut word_indexes_stream, &dfa);
|
||||
let lower_dists = stream_distances_vec(&mut word_indexes_lowercase_stream, &dfa_lowercase);
|
||||
|
||||
let mut merged = Vec::with_capacity(upper_dists.len());
|
||||
// Merge the two results, keeping the smallest distance when both DFAs match.
|
||||
// The uppercase and lowercase searches can return different result counts, so
|
||||
// we can't simply zip the vectors without losing matches.
|
||||
let mut merged = Vec::with_capacity(upper_dists.len().max(lower_dists.len()));
|
||||
let mut best_distances = HashMap::<u64, u8>::new();
|
||||
|
||||
// Merge the two results
|
||||
for ((i_u, dist_u), (i_l, dist_l)) in upper_dists.into_iter().zip(lower_dists.into_iter()) {
|
||||
let (chosen_index, edit_distance) = if dist_u <= dist_l {
|
||||
(i_u, dist_u)
|
||||
} else {
|
||||
(i_l, dist_l)
|
||||
};
|
||||
|
||||
let (word, metadata) = &self.words[chosen_index as usize];
|
||||
for (idx, dist) in upper_dists.into_iter().chain(lower_dists.into_iter()) {
|
||||
best_distances
|
||||
.entry(idx)
|
||||
.and_modify(|existing| *existing = (*existing).min(dist))
|
||||
.or_insert(dist);
|
||||
}
|
||||
|
||||
for (index, edit_distance) in best_distances {
|
||||
let (word, metadata) = &self.words[index as usize];
|
||||
merged.push(FuzzyMatchResult {
|
||||
word,
|
||||
edit_distance,
|
||||
metadata: Cow::Borrowed(metadata),
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
merged.sort_unstable_by_key(|v| v.word);
|
||||
merged.dedup_by_key(|v| v.word);
|
||||
merged.sort_unstable_by_key(|v| v.edit_distance);
|
||||
// Ignore exact matches
|
||||
merged.retain(|v| v.edit_distance > 0);
|
||||
merged.sort_unstable_by(|a, b| {
|
||||
a.edit_distance
|
||||
.cmp(&b.edit_distance)
|
||||
.then_with(|| a.word.cmp(b.word))
|
||||
});
|
||||
merged.truncate(max_results);
|
||||
|
||||
merged
|
||||
|
|
|
|||
|
|
@ -342,7 +342,7 @@ fn score_suggestion(misspelled_word: &[char], sug: &FuzzyMatchResult) -> i32 {
|
|||
score -= 6;
|
||||
}
|
||||
|
||||
if sug.edit_distance == 2 {
|
||||
if sug.edit_distance <= 2 {
|
||||
if is_ei_ie_misspelling(misspelled_word, sug.word) {
|
||||
score -= 11;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use crate::TokenKind;
|
|||
use hashbrown::HashSet;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::Punctuation;
|
||||
use crate::spell::Dictionary;
|
||||
use crate::{CharStringExt, Document, TokenStringExt, parsers::Parser};
|
||||
|
||||
|
|
@ -27,17 +28,46 @@ pub fn make_title_case_chars(
|
|||
make_title_case(document.get_tokens(), source.as_slice(), dict)
|
||||
}
|
||||
|
||||
pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec<char> {
|
||||
pub fn try_make_title_case(
|
||||
toks: &[Token],
|
||||
source: &[char],
|
||||
dict: &impl Dictionary,
|
||||
) -> Option<Vec<char>> {
|
||||
if toks.is_empty() {
|
||||
return Vec::new();
|
||||
return None;
|
||||
}
|
||||
|
||||
let start_index = toks.first().unwrap().span.start;
|
||||
let relevant_text = toks.span().unwrap().get_content(source);
|
||||
|
||||
let mut word_likes = toks.iter_word_likes().enumerate().peekable();
|
||||
let mut output = toks.span().unwrap().get_content(source).to_vec();
|
||||
let mut word_likes = toks.iter_word_like_indices().enumerate().peekable();
|
||||
|
||||
let mut output = None;
|
||||
let mut previous_word_index = 0;
|
||||
|
||||
// Checks if the output if the provided char is different from the source. If so, it will
|
||||
// set the output. The goal here is to avoid allocating if no edits must be made.
|
||||
let mut set_output_char = |idx: usize, new_char: char| {
|
||||
if output
|
||||
.as_ref()
|
||||
.is_some_and(|o: &Vec<char>| o[idx] != new_char)
|
||||
|| relevant_text[idx] != new_char
|
||||
{
|
||||
if output.is_none() {
|
||||
output = Some(relevant_text.to_vec())
|
||||
}
|
||||
|
||||
let Some(mutable) = &mut output else {
|
||||
panic!("We just set output to `Some`. This should be impossible.");
|
||||
};
|
||||
|
||||
mutable[idx] = new_char;
|
||||
}
|
||||
};
|
||||
|
||||
while let Some((index, word_idx)) = word_likes.next() {
|
||||
let word = &toks[word_idx];
|
||||
|
||||
while let Some((index, word)) = word_likes.next() {
|
||||
if let Some(Some(metadata)) = word.kind.as_word()
|
||||
&& metadata.is_proper_noun()
|
||||
{
|
||||
|
|
@ -46,59 +76,87 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary)
|
|||
|
||||
if let Some(correct_caps) = dict.get_correct_capitalization_of(orig_text) {
|
||||
// It should match the dictionary verbatim
|
||||
output[word.span.start - start_index..word.span.end - start_index]
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.for_each(|(idx, c)| *c = correct_caps[idx]);
|
||||
for (i, c) in correct_caps.iter().enumerate() {
|
||||
set_output_char(word.span.start - start_index + i, *c);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let should_capitalize = should_capitalize_token(word, source, dict)
|
||||
// Capitalize the first word following a colon to match Chicago style.
|
||||
let is_after_colon = toks[previous_word_index..word_idx]
|
||||
.iter()
|
||||
.any(|tok| matches!(tok.kind, TokenKind::Punctuation(Punctuation::Colon)));
|
||||
|
||||
let should_capitalize = is_after_colon
|
||||
|| should_capitalize_token(word, source)
|
||||
|| index == 0
|
||||
|| word_likes.peek().is_none();
|
||||
|
||||
if should_capitalize {
|
||||
output[word.span.start - start_index] =
|
||||
output[word.span.start - start_index].to_ascii_uppercase();
|
||||
set_output_char(
|
||||
word.span.start - start_index,
|
||||
relevant_text[word.span.start - start_index].to_ascii_uppercase(),
|
||||
);
|
||||
} else {
|
||||
// The whole word should be lowercase.
|
||||
for i in word.span {
|
||||
output[i - start_index] = output[i - start_index].to_ascii_lowercase();
|
||||
set_output_char(
|
||||
i - start_index,
|
||||
relevant_text[i - start_index].to_ascii_lowercase(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
previous_word_index = word_idx
|
||||
}
|
||||
|
||||
if let Some(output) = &output
|
||||
&& output.as_slice() == relevant_text
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec<char> {
|
||||
try_make_title_case(toks, source, dict)
|
||||
.unwrap_or_else(|| toks.span().unwrap_or_default().get_content(source).to_vec())
|
||||
}
|
||||
|
||||
/// Determines whether a token should be capitalized.
|
||||
/// Is not responsible for capitalization requirements that are dependent on token position.
|
||||
fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary) -> bool {
|
||||
fn should_capitalize_token(tok: &Token, source: &[char]) -> bool {
|
||||
match &tok.kind {
|
||||
TokenKind::Word(Some(metadata)) => {
|
||||
// Only specific conjunctions are not capitalized.
|
||||
lazy_static! {
|
||||
static ref SPECIAL_CONJUNCTIONS: HashSet<Vec<char>> =
|
||||
["and", "but", "for", "or", "nor"]
|
||||
["and", "but", "for", "or", "nor", "as"]
|
||||
.iter()
|
||||
.map(|v| v.chars().collect())
|
||||
.collect();
|
||||
static ref SPECIAL_ARTICLES: HashSet<Vec<char>> = ["a", "an", "the"]
|
||||
.iter()
|
||||
.map(|v| v.chars().collect())
|
||||
.collect();
|
||||
}
|
||||
|
||||
let chars = tok.span.get_content(source);
|
||||
let chars_lower = chars.to_lower();
|
||||
|
||||
let mut metadata = Cow::Borrowed(metadata);
|
||||
|
||||
if let Some(metadata_lower) = dict.get_word_metadata(&chars_lower) {
|
||||
metadata = Cow::Owned(metadata.clone().or(&metadata_lower));
|
||||
}
|
||||
let metadata = Cow::Borrowed(metadata);
|
||||
|
||||
let is_short_preposition = metadata.preposition && tok.span.len() <= 4;
|
||||
|
||||
if chars_lower.as_ref() == ['a', 'l', 'l'] {
|
||||
return true;
|
||||
}
|
||||
|
||||
!is_short_preposition
|
||||
&& !metadata.is_determiner()
|
||||
&& !metadata.is_non_possessive_determiner()
|
||||
&& !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_ref())
|
||||
&& !SPECIAL_ARTICLES.contains(chars_lower.as_ref())
|
||||
}
|
||||
_ => true,
|
||||
}
|
||||
|
|
@ -243,4 +301,208 @@ mod tests {
|
|||
"United States"
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keeps_decimal() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"harper turns 1.0 today",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"Harper Turns 1.0 Today"
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_odd_capitalized_proper_nouns() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"i spoke at wordcamp u.s. in 2025",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"I Spoke at WordCamp U.S. in 2025",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixes_your_correctly() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"it is not your friend",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"It Is Not Your Friend",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_old_man_and_the_sea() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"the old man and the sea",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"The Old Man and the Sea",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_great_story_with_subtitle() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"the great story: a tale of two cities",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"The Great Story: A Tale of Two Cities",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_lantern_and_moths() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"lantern flickered; moths began their worship",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"Lantern Flickered; Moths Began Their Worship",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_static_with_ghosts() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"static filled the room with ghosts",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"Static Filled the Room with Ghosts",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_glass_trembled_before_thunder() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"glass trembled before thunder arrived.",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"Glass Trembled Before Thunder Arrived.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_hepatitis_b_shots() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"an end to hepatitis b shots for all newborns",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"An End to Hepatitis B Shots for All Newborns",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_trump_approval_rating() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"trump's approval rating dips as views of his handling of the economy sour",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"Trump's Approval Rating Dips as Views of His Handling of the Economy Sour",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_last_door() {
|
||||
assert_eq!(
|
||||
make_title_case_str("the last door", &PlainEnglish, &FstDictionary::curated()),
|
||||
"The Last Door",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_midnight_river() {
|
||||
assert_eq!(
|
||||
make_title_case_str("midnight river", &PlainEnglish, &FstDictionary::curated()),
|
||||
"Midnight River",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_a_quiet_room() {
|
||||
assert_eq!(
|
||||
make_title_case_str("a quiet room", &PlainEnglish, &FstDictionary::curated()),
|
||||
"A Quiet Room",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_broken_map() {
|
||||
assert_eq!(
|
||||
make_title_case_str("broken map", &PlainEnglish, &FstDictionary::curated()),
|
||||
"Broken Map",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_fire_in_autumn() {
|
||||
assert_eq!(
|
||||
make_title_case_str("fire in autumn", &PlainEnglish, &FstDictionary::curated()),
|
||||
"Fire in Autumn",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_hidden_path() {
|
||||
assert_eq!(
|
||||
make_title_case_str("the hidden path", &PlainEnglish, &FstDictionary::curated()),
|
||||
"The Hidden Path",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_under_blue_skies() {
|
||||
assert_eq!(
|
||||
make_title_case_str("under blue skies", &PlainEnglish, &FstDictionary::curated()),
|
||||
"Under Blue Skies",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_lost_and_found() {
|
||||
assert_eq!(
|
||||
make_title_case_str("lost and found", &PlainEnglish, &FstDictionary::curated()),
|
||||
"Lost and Found",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_silent_watcher() {
|
||||
assert_eq!(
|
||||
make_title_case_str(
|
||||
"the silent watcher",
|
||||
&PlainEnglish,
|
||||
&FstDictionary::curated()
|
||||
),
|
||||
"The Silent Watcher",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_winter_road() {
|
||||
assert_eq!(
|
||||
make_title_case_str("winter road", &PlainEnglish, &FstDictionary::curated()),
|
||||
"Winter Road",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ pub enum TokenKind {
|
|||
Unlintable,
|
||||
ParagraphBreak,
|
||||
Regexish,
|
||||
HeadingStart,
|
||||
}
|
||||
|
||||
impl TokenKind {
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue