mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
Compare commits
178 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
073f60638b | ||
|
|
ce223e73ad | ||
|
|
31c6ecfad5 | ||
|
|
e784f9b212 | ||
|
|
d47b045dca | ||
|
|
78e519daa4 | ||
|
|
07e911e043 | ||
|
|
d2fb82c670 | ||
|
|
441a5b153b | ||
|
|
ec8c8d3552 | ||
|
|
850eb4cb60 | ||
|
|
f3dfb234c4 | ||
|
|
913debf140 | ||
|
|
fbda09642e | ||
|
|
38c7a45533 | ||
|
|
1186655360 | ||
|
|
c160137d43 | ||
|
|
33b60f93e8 | ||
|
|
704a647f26 | ||
|
|
a168e3691d | ||
|
|
18129baa35 | ||
|
|
0dc6738905 | ||
|
|
688b2a5191 | ||
|
|
5e5a0a473a | ||
|
|
6a97ff125c | ||
|
|
639a4b153d | ||
|
|
433829f3b8 | ||
|
|
c9f4b7c49c | ||
|
|
5fc3171a1c | ||
|
|
531bd25fc5 | ||
|
|
bcd0a1c463 | ||
|
|
5c09e09c10 | ||
|
|
f15778ed28 | ||
|
|
6ac8406e29 | ||
|
|
309719994c | ||
|
|
86edc6e34f | ||
|
|
3fa856a68c | ||
|
|
71739177a4 | ||
|
|
dedfa31b98 | ||
|
|
37ae9b4c69 | ||
|
|
2330456311 | ||
|
|
69db96b370 | ||
|
|
9eefe7336e | ||
|
|
e6579442f3 | ||
|
|
b642c99212 | ||
|
|
6b8ffd29f5 | ||
|
|
66f3e84357 | ||
|
|
4b8c619bb7 | ||
|
|
782ab95200 | ||
|
|
9c2892e265 | ||
|
|
02bd5bc5d7 | ||
|
|
bfaa324c07 | ||
|
|
1301d20400 | ||
|
|
e9eb82944e | ||
|
|
cc15110b69 | ||
|
|
d0d0ac72df | ||
|
|
a437d1abd6 | ||
|
|
3def332854 | ||
|
|
2910319fb5 | ||
|
|
483c306110 | ||
|
|
c246654906 | ||
|
|
94a83d7e78 | ||
|
|
72fd089764 | ||
|
|
e40967cf3e | ||
|
|
fc87bd518b | ||
|
|
bd319e7130 | ||
|
|
8cfb684724 | ||
|
|
1ac108fb2c | ||
|
|
150656ee42 | ||
|
|
aa2cdd4029 | ||
|
|
70d5861b94 | ||
|
|
30ff6e1f22 | ||
|
|
8dd02aad4e | ||
|
|
d31396e7a3 | ||
|
|
5d7bbeefc9 | ||
|
|
edff925df0 | ||
|
|
611475176d | ||
|
|
212508e1d9 | ||
|
|
63914aa23e | ||
|
|
4c539bc75f | ||
|
|
5ac4c0ad2e | ||
|
|
538b21fb0c | ||
|
|
8fff5e9a56 | ||
|
|
5d3beaad4f | ||
|
|
a8e09d5635 | ||
|
|
235a4016ea | ||
|
|
43e74ebdf3 | ||
|
|
addb5bf441 | ||
|
|
d9299b5e63 | ||
|
|
0889bf0c82 | ||
|
|
d7aa283759 | ||
|
|
256cf92196 | ||
|
|
90efaf6430 | ||
|
|
25f2c371d9 | ||
|
|
a03e4903c8 | ||
|
|
a5539e4a48 | ||
|
|
2a954e7014 | ||
|
|
20fa392611 | ||
|
|
7b77964ae1 | ||
|
|
230e142404 | ||
|
|
c4f04faa41 | ||
|
|
0d8cedbf59 | ||
|
|
38bb4abf5e | ||
|
|
c808ed9e58 | ||
|
|
4742064f36 | ||
|
|
b1e50b2dc6 | ||
|
|
1377ab51a5 | ||
|
|
da9e2ba0d6 | ||
|
|
8ff2fe4b79 | ||
|
|
f111abec9a | ||
|
|
3b0a5025a7 | ||
|
|
e819fbbf1e | ||
|
|
fcc8b7bbe2 | ||
|
|
09b6ca771c | ||
|
|
bf00f274f8 | ||
|
|
8a5bf92b00 | ||
|
|
31eaa758c9 | ||
|
|
7080884073 | ||
|
|
9736f5782e | ||
|
|
26760275cb | ||
|
|
fac069997b | ||
|
|
eac891213f | ||
|
|
42b005d256 | ||
|
|
56a81b8d00 | ||
|
|
ad429ad0d0 | ||
|
|
5a21c570fc | ||
|
|
36df3b9505 | ||
|
|
4f58178880 | ||
|
|
f35dfd62b0 | ||
|
|
3b1b126dd7 | ||
|
|
589ca33466 | ||
|
|
9aba2e4e4e | ||
|
|
cd92514808 | ||
|
|
4b78139dc5 | ||
|
|
d3eb71e7e6 | ||
|
|
56ede304d6 | ||
|
|
b5893dda88 | ||
|
|
3f26fdfa8b | ||
|
|
462ad601f0 | ||
|
|
029903cfd5 | ||
|
|
a5eb934d4f | ||
|
|
e38cf90610 | ||
|
|
64017175b9 | ||
|
|
b284ac04bd | ||
|
|
fb656ff077 | ||
|
|
2c37f83c69 | ||
|
|
214b2b2f63 | ||
|
|
953b6e635a | ||
|
|
2514f881c8 | ||
|
|
ed21fe2d12 | ||
|
|
6367706408 | ||
|
|
d323c39b50 | ||
|
|
9526d55d17 | ||
|
|
e4955aeb54 | ||
|
|
06384d8ad9 | ||
|
|
15497e5960 | ||
|
|
dde4920ff7 | ||
|
|
9f331b4fff | ||
|
|
aacc73150e | ||
|
|
64e95d8010 | ||
|
|
10592c8fc5 | ||
|
|
6714b73d15 | ||
|
|
a125a397ef | ||
|
|
19832424d6 | ||
|
|
e27bab5bc2 | ||
|
|
33e06c9b0f | ||
|
|
a8e2ba8a75 | ||
|
|
68add530af | ||
|
|
a6c7b34c04 | ||
|
|
08675f287d | ||
|
|
dc751b7aae | ||
|
|
1e1bbfdeba | ||
|
|
c38185015b | ||
|
|
3162a60b81 | ||
|
|
34f7a8321d | ||
|
|
76d15dcbcd | ||
|
|
c50dfcdc40 | ||
|
|
f465bec169 |
439 changed files with 29472 additions and 15648 deletions
2
.gitattributes
vendored
2
.gitattributes
vendored
|
|
@ -1 +1,3 @@
|
|||
* text=auto eol=lf
|
||||
quill_simple.html linguist-generated
|
||||
github_textarea.html linguist-generated
|
||||
|
|
|
|||
2
.github/workflows/build_web.yml
vendored
2
.github/workflows/build_web.yml
vendored
|
|
@ -22,7 +22,7 @@ jobs:
|
|||
- uses: redhat-actions/buildah-build@v2
|
||||
with:
|
||||
image: web
|
||||
layers: false
|
||||
layers: true
|
||||
containerfiles: |
|
||||
Dockerfile
|
||||
build-args: |
|
||||
|
|
|
|||
2
.github/workflows/just_checks.yml
vendored
2
.github/workflows/just_checks.yml
vendored
|
|
@ -26,7 +26,6 @@ jobs:
|
|||
test-chrome-plugin,
|
||||
test-firefox-plugin,
|
||||
test-obsidian,
|
||||
build-obsidian,
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
@ -40,7 +39,6 @@ jobs:
|
|||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version-file: ".node-version"
|
||||
package-manager-cache: false
|
||||
- name: Enable Corepack
|
||||
run: corepack enable
|
||||
- name: Rust Cache
|
||||
|
|
|
|||
301
Cargo.lock
generated
301
Cargo.lock
generated
|
|
@ -29,6 +29,15 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloca"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
|
|
@ -293,9 +302,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
|||
|
||||
[[package]]
|
||||
name = "burn"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddecb408e100eedc1175cf0fd8092507dcea92ef5c22e1e25be85af7fed4cd7f"
|
||||
checksum = "0291ea5c68786545e239a02f63331cfe39da7485164ae05197d5be6f148d0557"
|
||||
dependencies = [
|
||||
"burn-autodiff",
|
||||
"burn-candle",
|
||||
|
|
@ -313,9 +322,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-autodiff"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a76610cf4fd32a9dd35b62989feb40a137a807f6ebfb015166a0b785b55657ff"
|
||||
checksum = "917423a74bf4d39f17a6799089869648e3d2b6ac89d93901aab4aeb9a7f82138"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"burn-tensor",
|
||||
|
|
@ -329,9 +338,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-candle"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "336126c4874dac8458f65fe743958397833e08a50b702740c042cad6ead8343b"
|
||||
checksum = "2891811d41ae30b5f1f660e7615b757b2cb4128af5e311b213656de3875e4acb"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"burn-tensor",
|
||||
|
|
@ -342,9 +351,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-common"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "226fe90c2a4a182dd6e58630ec836772efb8fb263c173bd4cda6f76a08924ac7"
|
||||
checksum = "5eb445304e4f91f8633d23c9a5258cd93639d13ce2ee47d4821fd519b683bf02"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"rayon",
|
||||
|
|
@ -353,9 +362,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-core"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9621a37f59cdfa4492398f84925acdb0a9fb10d515d4e6809b62353e27e21f5d"
|
||||
checksum = "20c93e754864080a8c27b9a47e3b6f7d79013cf82c9ce00ed57c9ba51a3e34c5"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bincode",
|
||||
|
|
@ -383,9 +392,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-cubecl"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6089a87d4646d62b56b527f09120ec11fdd699cce773c93d029ef3f7751911e"
|
||||
checksum = "0dd16308b7b0291c77f2d7acf428bc8254ec3db88a430a26cf3d3b0b63ae2d46"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"burn-cubecl-fusion",
|
||||
|
|
@ -409,9 +418,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-cubecl-fusion"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50398855bd40bfa74e10667c312fe7422986cfc230e854d4512fb709a28f4a81"
|
||||
checksum = "cc21cf88201dfbf242cadb638a0cc924010727fc37d6a719f7e10548b339c63a"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"burn-fusion",
|
||||
|
|
@ -426,9 +435,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-cuda"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc7adaf3f97a129c2394eb3170040410ada7b58d9f1aa28f5ec6cd6deb6f37f1"
|
||||
checksum = "1e104dcf07eac70c7b5864b51d792df3360b11b00febb60543b4283bb414bb61"
|
||||
dependencies = [
|
||||
"burn-cubecl",
|
||||
"burn-tensor",
|
||||
|
|
@ -441,9 +450,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-dataset"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6cda16435acac1c6ea9c3f9bed5a4210a4cebbdb14abc30a1aeeff45460e3b6e"
|
||||
checksum = "534d4398fd6aaec32f8caeb3f20ddffcd8a059bdefc01cc2794b91b4e984e8ea"
|
||||
dependencies = [
|
||||
"csv",
|
||||
"derive-new",
|
||||
|
|
@ -460,9 +469,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-derive"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16277c15ea0eeeab321e8f7251be786483d6e7755e579ab39d7dd15043f7c3e1"
|
||||
checksum = "3bcf49261de086b8206de6c8962d2adf23feb476119a18e384f5b2c9af07c0cf"
|
||||
dependencies = [
|
||||
"derive-new",
|
||||
"proc-macro2",
|
||||
|
|
@ -472,9 +481,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-fusion"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b706d24d5dba3517ad0239e7591cd41c79dd5331a828ad736988b7735aa3e94a"
|
||||
checksum = "662bf2679c04be34a0c3f1b11f77f6ff49456af1620d1eca311bc2562bbb56c9"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"burn-ir",
|
||||
|
|
@ -489,9 +498,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-ir"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69150047c338878c0fc1a4d8e26c399d1e2d9c97c4f87eecc4130dd03c2472a1"
|
||||
checksum = "9161239d5691c4ab6f470f2c65aaec5c0a7c1f0b0da390700bcd59f5a77d1d7b"
|
||||
dependencies = [
|
||||
"burn-tensor",
|
||||
"hashbrown 0.15.5",
|
||||
|
|
@ -501,9 +510,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-ndarray"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed9b60c14a706bf2ddc9e1a49e9eb48eb1828b11c719785de676ee7372603de6"
|
||||
checksum = "b78bcf4a3508043342f918e796dc79108b5f3252398403eb73952847e7683374"
|
||||
dependencies = [
|
||||
"atomic_float",
|
||||
"burn-autodiff",
|
||||
|
|
@ -528,9 +537,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-nn"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2b73072f9c77798f641a7f66dc6fa3e1092021cf39322e8ea68ff7f572c17ae"
|
||||
checksum = "dc7829c87c4dd6c7929b50fd981e7e8d1b77414323da30ce2067a3e8b7ea422b"
|
||||
dependencies = [
|
||||
"burn-core",
|
||||
"num-traits",
|
||||
|
|
@ -538,9 +547,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-optim"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0f89c10e95d5d4691e5bba54d510e7148687cda9b2a2394eef29683a981cbdd"
|
||||
checksum = "31758c02e50247f12457fca1905ed8684ac1b1c5292e10cbbfffb9fa0048d4bd"
|
||||
dependencies = [
|
||||
"burn-core",
|
||||
"derive-new",
|
||||
|
|
@ -552,9 +561,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-rocm"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f08f8fea5ef76b67731c79529d9346f8623091832789dcc9e47aa85b50fe9d5f"
|
||||
checksum = "5e1ceb87b6e7349b42d7995477c9a69d0e6c458c64eafa10af3b8b9070f260aa"
|
||||
dependencies = [
|
||||
"burn-cubecl",
|
||||
"burn-tensor",
|
||||
|
|
@ -567,9 +576,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-router"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9342581c5ca083db421a1c51d2c4c4b3326f5d86dae3d21a81420ac564be4057"
|
||||
checksum = "45f40403c500b5df380bee47aa0f23032350bdfde5402812d6fcec4d6ff6fbad"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"burn-ir",
|
||||
|
|
@ -581,9 +590,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-store"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c742ebf6d46a092c7324237f6f89a9cff8502c8c48db3961b435e6ee00bc316d"
|
||||
checksum = "0a2a163486242fcb0c6e2cb89c5a803ab8588673652bb46ecd7af6378d06152f"
|
||||
dependencies = [
|
||||
"burn-core",
|
||||
"burn-nn",
|
||||
|
|
@ -598,9 +607,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-tensor"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48ff77a13d211fbf7a861e07eefc93bd1fb07a84da34e913c6e13ec1271208cc"
|
||||
checksum = "df8861f7c21d3b07a2b19d028f6eb8903990949708b2ec825559b5200786877c"
|
||||
dependencies = [
|
||||
"burn-common",
|
||||
"bytemuck",
|
||||
|
|
@ -619,9 +628,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-train"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c35107c50d97b73f2fc96d9c1beaab732e0f27bf5f9b847b8e3f6619ee85873"
|
||||
checksum = "b0f1553197d50668823a4bafc187c62439df49b218973f0ca79e034b57ce38d6"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"burn-core",
|
||||
|
|
@ -638,9 +647,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "burn-wgpu"
|
||||
version = "0.19.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f2361ddac1bdc98e4f10dd8f62ded836537c316ca78ca4f41577ced0de4f21f"
|
||||
checksum = "c17aeaa2eadaa4831a64672b99f62ffcdf4874fe4757080633d8a6c4452e2b38"
|
||||
dependencies = [
|
||||
"burn-cubecl",
|
||||
"burn-tensor",
|
||||
|
|
@ -835,9 +844,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.51"
|
||||
version = "4.5.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
|
||||
checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
|
|
@ -845,9 +854,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.51"
|
||||
version = "4.5.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
|
||||
checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
|
|
@ -1008,10 +1017,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "criterion"
|
||||
version = "0.7.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
|
||||
checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf"
|
||||
dependencies = [
|
||||
"alloca",
|
||||
"anes",
|
||||
"cast",
|
||||
"ciborium",
|
||||
|
|
@ -1020,6 +1030,7 @@ dependencies = [
|
|||
"itertools 0.13.0",
|
||||
"num-traits",
|
||||
"oorandom",
|
||||
"page_size",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -1029,9 +1040,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.6.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
|
||||
checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools 0.13.0",
|
||||
|
|
@ -1139,9 +1150,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3f2f345e729b90e342089acfbecb2818d170ff7ecccb2eba441903283f583fe"
|
||||
checksum = "b8b7c74ecaca9356c9ae79d0ebf1db04f02bd98be09eea61f51d73373dffe758"
|
||||
dependencies = [
|
||||
"cubecl-convolution",
|
||||
"cubecl-core",
|
||||
|
|
@ -1159,9 +1170,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-common"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d733d3437e87635378a16dd6d26062b7f184e5a4dd3437486ed953514bf1efd"
|
||||
checksum = "4556981155bffc057a8effcd4549b52b51df3e9edec43af6ccae2dd03fc8fbff"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cfg-if",
|
||||
|
|
@ -1193,9 +1204,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-convolution"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6a77dce74132f4c44c7557a01375beb92a25eed242c9082563cb9b6172832b5"
|
||||
checksum = "27c624ec400b7203673bf2db86d7ff30d1384839d497d2dd029c19b1b7371e0d"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cubecl-common",
|
||||
|
|
@ -1212,9 +1223,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-core"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "850dbdac9cedfa367248e4eae7f26c2011c1ec4b6f4e1d88028a1f947d0d73c2"
|
||||
checksum = "0ffc10af538ee74535cda260e581f5a177c243803dd30b698934a515f0114b55"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"bytemuck",
|
||||
|
|
@ -1237,9 +1248,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-cpp"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8732743cd1167eca4cf4045d3e9f9e219520d92dae17d8aa9bd18d6c2eb49663"
|
||||
checksum = "d630e4d10cdd3af268ac753914ca79b48f01d1e36c5b5039970a817acc925fea"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cubecl-common",
|
||||
|
|
@ -1254,9 +1265,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-cpu"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baaa77f85342f7ccf6d33b5707e789a1beab339b00145b5868f7d590c319f4a4"
|
||||
checksum = "ac1693555277d74152afb61a23e30d1f17d72cebd317a648faf50a8e69380f08"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cubecl-common",
|
||||
|
|
@ -1278,9 +1289,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-cuda"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "839ba85c69d3158b8d11f54e887f794507067be7f5be8547ac53b833f4a58784"
|
||||
checksum = "67215fcd552a9e8bc68494a71cf2979f2e2bbcbda60f0695f56f86705b89ed5f"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cubecl-common",
|
||||
|
|
@ -1296,9 +1307,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-hip"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73fa7d3f53a8a07656a1caa6414cf94700e25d876a2ff33dff9dea3190736b5b"
|
||||
checksum = "d5e2e6a257f702fb2eb6f24e640e228a94695e4a4c73a4c549578cbb02ad4ec5"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cubecl-common",
|
||||
|
|
@ -1326,9 +1337,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-ir"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d8c468ca6c904ad97ed562fb61f77f3cdc929bc322f54b7c39e33cc49b20646"
|
||||
checksum = "bf5d3aa7857e6aee1622aef128d6ad8d9289ed57362b4e65d10cc182aafc585f"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"cubecl-macros-internal",
|
||||
|
|
@ -1346,9 +1357,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-macros"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d0ac86afe6b0a48569cc839605cf62d1d46e482f3476a87130d5166dafcc5c0"
|
||||
checksum = "5200fb619be424749901e3c6e8e66ae71146c8f83636a74f171bd980cba379d7"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"darling 0.21.0",
|
||||
|
|
@ -1374,9 +1385,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-matmul"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bdcca3ffa29b2f1abffab6b27046b2f86a9ebedfed597c405e4179984a004f8d"
|
||||
checksum = "d1cf0a00609a249d5357c27cafea477f35218579db2ab00582d8d5800be4a5a3"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cubecl-common",
|
||||
|
|
@ -1392,9 +1403,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-opt"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03c32810f85959e6c76dd336bf85d5a85b2752edb27310f0993d5b6b2243a308"
|
||||
checksum = "870ca4b52f9eebd358c9b360b89cdc9f82bde05682db63f0e90c666b3c85a04d"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"cubecl-core",
|
||||
|
|
@ -1410,9 +1421,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-quant"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "880705a7c0254b619ec30465db8dc8943c3ddba5ea81d644d21e95084f68a4f4"
|
||||
checksum = "9be3e1202c219078d85dbad7f30d1195fe4f9d42cbfad2c94ab0ea1a6d9f01f6"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"cubecl-core",
|
||||
|
|
@ -1424,9 +1435,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-random"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7632ca58b1080ac883dd9313f7374757476b3e9c741a32353c71ec5765fa7f9b"
|
||||
checksum = "9a293a05caa68663675823bab66205bca094a21a2c0f6686ad9f20b392516179"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"cubecl-core",
|
||||
|
|
@ -1440,9 +1451,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-reduce"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b759b19145c2e3a9d270200745c0fdbb9129c269f33f0b2ceab5e29f794e27"
|
||||
checksum = "53306ace81f6262f7ae794370f47e6b5019842b27e8800240e5b039386b3ac3a"
|
||||
dependencies = [
|
||||
"cubecl-core",
|
||||
"cubecl-runtime",
|
||||
|
|
@ -1456,9 +1467,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-runtime"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b04e944b2097b7c8931498a57615b290cdda755197d7a24d272c306af6e1edd9"
|
||||
checksum = "91b823bb5899a6fa8809bf7aa36f93f72ced6de58ab9d6edea2c730b235eeda3"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"bytemuck",
|
||||
|
|
@ -1484,9 +1495,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-std"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e83ca684200a5c77231460ec613a654ccb7cd46a9ec22d13cc94bc6c78d1a102"
|
||||
checksum = "24536998f9fff84f9a1dd2a90f981d5aa4d15eb35cddec5021c4fcf977d2e75e"
|
||||
dependencies = [
|
||||
"cubecl-common",
|
||||
"cubecl-core",
|
||||
|
|
@ -1501,9 +1512,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cubecl-wgpu"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08ec904606258323e0cf56e05935989121cf40f344970023e783682ed757439c"
|
||||
checksum = "d59a7d737259a784247595e2f0cc5a97d3e50f45cdaefbd4cc7d7fd2126f7a58"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"bytemuck",
|
||||
|
|
@ -1523,9 +1534,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cudarc"
|
||||
version = "0.17.5"
|
||||
version = "0.17.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7342f14f265a572a93e6c2f26a566f5f9341d6bee7a8a72ce77bf328c917199f"
|
||||
checksum = "ff0da1a70ec91e66731c1752deb9fda3044f1154fe4ceb5873e3f96ed34cafa3"
|
||||
dependencies = [
|
||||
"libloading",
|
||||
]
|
||||
|
|
@ -2224,6 +2235,18 @@ dependencies = [
|
|||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzz"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"harper-comments",
|
||||
"harper-core",
|
||||
"harper-html",
|
||||
"harper-literate-haskell",
|
||||
"harper-typst",
|
||||
"libfuzzer-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm"
|
||||
version = "0.17.1"
|
||||
|
|
@ -2646,7 +2669,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-brill"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-pos-utils",
|
||||
"lazy_static",
|
||||
|
|
@ -2661,6 +2684,7 @@ dependencies = [
|
|||
"ariadne",
|
||||
"clap",
|
||||
"dirs 6.0.0",
|
||||
"either",
|
||||
"harper-comments",
|
||||
"harper-core",
|
||||
"harper-ink",
|
||||
|
|
@ -2669,15 +2693,17 @@ dependencies = [
|
|||
"harper-python",
|
||||
"harper-stats",
|
||||
"harper-typst",
|
||||
"hashbrown 0.16.0",
|
||||
"hashbrown 0.16.1",
|
||||
"rayon",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harper-comments"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-html",
|
||||
|
|
@ -2711,7 +2737,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-core"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"ammonia",
|
||||
"bitflags 2.10.0",
|
||||
|
|
@ -2721,7 +2747,7 @@ dependencies = [
|
|||
"foldhash 0.2.0",
|
||||
"fst",
|
||||
"harper-brill",
|
||||
"hashbrown 0.16.0",
|
||||
"hashbrown 0.16.1",
|
||||
"is-macro",
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
|
|
@ -2749,7 +2775,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-html"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2760,7 +2786,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-ink"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2771,7 +2797,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-jjdescription"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2782,7 +2808,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-literate-haskell"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-comments",
|
||||
"harper-core",
|
||||
|
|
@ -2793,7 +2819,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-ls"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
|
|
@ -2824,11 +2850,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-pos-utils"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"burn",
|
||||
"burn-ndarray",
|
||||
"hashbrown 0.16.0",
|
||||
"hashbrown 0.16.1",
|
||||
"is-macro",
|
||||
"lru",
|
||||
"rand 0.9.2",
|
||||
|
|
@ -2842,7 +2868,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-python"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
|
|
@ -2853,7 +2879,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-stats"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"harper-core",
|
||||
|
|
@ -2864,7 +2890,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-tree-sitter"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"tree-sitter",
|
||||
|
|
@ -2883,7 +2909,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "harper-typst"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"itertools 0.14.0",
|
||||
|
|
@ -2938,14 +2964,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.16.0"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
|
||||
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash 0.2.0",
|
||||
"serde",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3219,12 +3246,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.12.0"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
|
||||
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.16.0",
|
||||
"hashbrown 0.16.1",
|
||||
"serde",
|
||||
"serde_core",
|
||||
]
|
||||
|
|
@ -3370,6 +3397,16 @@ version = "0.2.174"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||
|
||||
[[package]]
|
||||
name = "libfuzzer-sys"
|
||||
version = "0.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.8"
|
||||
|
|
@ -3466,7 +3503,7 @@ version = "0.16.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f"
|
||||
dependencies = [
|
||||
"hashbrown 0.16.0",
|
||||
"hashbrown 0.16.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3909,9 +3946,9 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
|
|||
|
||||
[[package]]
|
||||
name = "open"
|
||||
version = "5.3.2"
|
||||
version = "5.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2483562e62ea94312f3576a7aca397306df7990b8d89033e18766744377ef95"
|
||||
checksum = "43bb73a7fa3799b198970490a51174027ba0d4ec504b03cd08caf513d40024bc"
|
||||
dependencies = [
|
||||
"is-wsl",
|
||||
"libc",
|
||||
|
|
@ -3944,6 +3981,16 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.2.1"
|
||||
|
|
@ -5610,9 +5657,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.41"
|
||||
version = "0.1.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
|
||||
checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
|
|
@ -5633,9 +5680,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.30"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
|
||||
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -5644,9 +5691,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.34"
|
||||
version = "0.1.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
|
||||
checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
|
|
@ -5665,9 +5712,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.20"
|
||||
version = "0.3.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
|
||||
checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
|
||||
dependencies = [
|
||||
"nu-ansi-term",
|
||||
"sharded-slab",
|
||||
|
|
@ -5704,9 +5751,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter-bash"
|
||||
version = "0.25.0"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "871b0606e667e98a1237ebdc1b0d7056e0aebfdc3141d12b399865d4cb6ed8a6"
|
||||
checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
|
|
@ -6094,9 +6141,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "unicode-script"
|
||||
version = "0.5.7"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fb421b350c9aff471779e262955939f565ec18b86c15364e6bdf0d662ca7c1f"
|
||||
checksum = "383ad40bb927465ec0ce7720e033cb4ca06912855fc35db31b5755d0de75b1ee"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
|
|
@ -6183,13 +6230,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
|||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.18.1"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
|
||||
checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
|
||||
dependencies = [
|
||||
"getrandom 0.3.4",
|
||||
"js-sys",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[workspace]
|
||||
members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python", "harper-jjdescription"]
|
||||
members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python", "harper-jjdescription", "fuzz"]
|
||||
resolver = "2"
|
||||
|
||||
[profile.test]
|
||||
|
|
|
|||
|
|
@ -27,7 +27,11 @@ WORKDIR /usr/build/
|
|||
COPY . .
|
||||
COPY --from=wasm-build /usr/build/harper-wasm/pkg /usr/build/harper-wasm/pkg
|
||||
|
||||
RUN pnpm install --shamefully-hoist
|
||||
RUN pnpm install --engine-strict=false --shamefully-hoist
|
||||
|
||||
WORKDIR /usr/build/packages/components
|
||||
RUN pnpm install --engine-strict=false --shamefully-hoist
|
||||
RUN pnpm build
|
||||
|
||||
WORKDIR /usr/build/packages/harper.js
|
||||
|
||||
|
|
@ -37,7 +41,7 @@ WORKDIR /usr/build/packages/lint-framework
|
|||
RUN pnpm build
|
||||
|
||||
WORKDIR /usr/build/packages/web
|
||||
RUN pnpm install --shamefully-hoist
|
||||
RUN pnpm install --engine-strict=false --shamefully-hoist
|
||||
RUN pnpm build
|
||||
|
||||
FROM node:${NODE_VERSION}
|
||||
|
|
|
|||
4
fuzz/.gitignore
vendored
Normal file
4
fuzz/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
target
|
||||
corpus
|
||||
artifacts
|
||||
coverage
|
||||
52
fuzz/Cargo.toml
Normal file
52
fuzz/Cargo.toml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
[package]
|
||||
name = "fuzz"
|
||||
version = "0.0.0"
|
||||
publish = false
|
||||
edition = "2024"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
harper-core = { path = "../harper-core" }
|
||||
harper-typst = { path = "../harper-typst" }
|
||||
harper-literate-haskell = { path = "../harper-literate-haskell" }
|
||||
harper-html = { path = "../harper-html" }
|
||||
harper-comments = { path = "../harper-comments" }
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_harper_typst"
|
||||
path = "fuzz_targets/fuzz_harper_typst.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_harper_literate_haskell"
|
||||
path = "fuzz_targets/fuzz_harper_literate_haskell.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_harper_html"
|
||||
path = "fuzz_targets/fuzz_harper_html.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_harper_comment"
|
||||
path = "fuzz_targets/fuzz_harper_comment.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_harper_core_markdown"
|
||||
path = "fuzz_targets/fuzz_harper_core_markdown.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
34
fuzz/README.md
Normal file
34
fuzz/README.md
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# cargo-fuzz targets
|
||||
|
||||
## Setup
|
||||
|
||||
Follow the rust-fuzz [setup guide](https://rust-fuzz.github.io/book/cargo-fuzz/setup.html).
|
||||
You need a nightly toolchain and the cargo-fuzz plugin.
|
||||
|
||||
Simple installation steps:
|
||||
|
||||
- `rustup install nightly`
|
||||
- `cargo install cargo-fuzz`
|
||||
|
||||
## Adding a new fuzzing target
|
||||
|
||||
To add a new target, run `cargo fuzz add $TARGET_NAME`
|
||||
|
||||
## Doing a fuzzing run
|
||||
|
||||
If possible, prefill the `fuzz/corpus/$TARGET_NAME` directory with appropriate examples to speed up fuzzing.
|
||||
The fuzzer should be coverage aware, so providing a well formed input document to fuzzing targets only expecting a string as input can speed things up a lot.
|
||||
|
||||
Then, run `cargo +nightly fuzz run $TARGET_NAME -- -timeout=$TIMEOUT`
|
||||
|
||||
The timeout flag accepts a timeout in seconds, after which a long-running test case will be aborted.
|
||||
This should be set to a low number to quickly report endless loops / deep recursion in parsers.
|
||||
|
||||
The normal fuzzing run will continue until a crash is found.
|
||||
|
||||
Alternatively, if you want to run all the fuzzing targets at once: `cargo +nightly fuzz list | parallel -j0 cargo +nightly fuzz run {} -- -timeout=$TIMEOUT`
|
||||
|
||||
## Minifying a test case
|
||||
|
||||
Once the fuzzer finds a crash, we probably want to minify the result.
|
||||
This can be done with `CARGO_PROFILE_RELEASE_LTO=false cargo +nightly fuzz tmin $TARGET $TEST_CASE_PATH`
|
||||
76
fuzz/fuzz_targets/fuzz_harper_comment.rs
Normal file
76
fuzz/fuzz_targets/fuzz_harper_comment.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#![no_main]
|
||||
|
||||
use harper_core::parsers::{MarkdownOptions, StrParser};
|
||||
use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured};
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Language(String);
|
||||
|
||||
const LANGUAGES: [&str; 32] = [
|
||||
"cmake",
|
||||
"cpp",
|
||||
"csharp",
|
||||
"c",
|
||||
"dart",
|
||||
"go",
|
||||
"haskell",
|
||||
"javascriptreact",
|
||||
"javascript",
|
||||
"java",
|
||||
"kotlin",
|
||||
"lua",
|
||||
"nix",
|
||||
"php",
|
||||
"python",
|
||||
"ruby",
|
||||
"rust",
|
||||
"scala",
|
||||
"shellscript",
|
||||
"solidity",
|
||||
"swift",
|
||||
"toml",
|
||||
"typescriptreact",
|
||||
"typescript",
|
||||
"clojure",
|
||||
"go",
|
||||
"lua",
|
||||
"java",
|
||||
"javascriptreact",
|
||||
"typescript",
|
||||
"typescriptreact",
|
||||
"solidity",
|
||||
];
|
||||
|
||||
impl<'a> Arbitrary<'a> for Language {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
let &lang = u.choose(&LANGUAGES)?;
|
||||
Ok(Language(lang.to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Input {
|
||||
language: Language,
|
||||
text: String,
|
||||
}
|
||||
|
||||
impl<'a> Arbitrary<'a> for Input {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
let (language, text) = Arbitrary::arbitrary(u)?;
|
||||
Ok(Input { language, text })
|
||||
}
|
||||
|
||||
fn arbitrary_take_rest(u: Unstructured<'a>) -> Result<Self> {
|
||||
let (language, text) = Arbitrary::arbitrary_take_rest(u)?;
|
||||
Ok(Input { language, text })
|
||||
}
|
||||
}
|
||||
|
||||
fuzz_target!(|data: Input| {
|
||||
let opts = MarkdownOptions::default();
|
||||
let parser = harper_comments::CommentParser::new_from_language_id(&data.language.0, opts);
|
||||
if let Some(parser) = parser {
|
||||
let _res = parser.parse_str(&data.text);
|
||||
}
|
||||
});
|
||||
10
fuzz/fuzz_targets/fuzz_harper_core_markdown.rs
Normal file
10
fuzz/fuzz_targets/fuzz_harper_core_markdown.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#![no_main]
|
||||
|
||||
use harper_core::parsers::{Markdown, MarkdownOptions, StrParser};
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &str| {
|
||||
let opts = MarkdownOptions::default();
|
||||
let parser = Markdown::new(opts);
|
||||
let _res = parser.parse_str(data);
|
||||
});
|
||||
9
fuzz/fuzz_targets/fuzz_harper_html.rs
Normal file
9
fuzz/fuzz_targets/fuzz_harper_html.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#![no_main]
|
||||
|
||||
use harper_core::parsers::StrParser;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &str| {
|
||||
let parser = harper_html::HtmlParser::default();
|
||||
let _res = parser.parse_str(data);
|
||||
});
|
||||
9
fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs
Normal file
9
fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#![no_main]
|
||||
|
||||
// use harper_core::parsers::StrParser;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|_data: &str| {
|
||||
// TODO: figure out how to create a literate haskell parser
|
||||
// let _res = typst.parse_str(&data);
|
||||
});
|
||||
9
fuzz/fuzz_targets/fuzz_harper_typst.rs
Normal file
9
fuzz/fuzz_targets/fuzz_harper_typst.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#![no_main]
|
||||
|
||||
use harper_core::parsers::StrParser;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &str| {
|
||||
let typst = harper_typst::Typst;
|
||||
let _res = typst.parse_str(data);
|
||||
});
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
[package]
|
||||
name = "harper-brill"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/automattic/harper"
|
||||
|
||||
[dependencies]
|
||||
harper-pos-utils = { path = "../harper-pos-utils/", version = "0.71.0" }
|
||||
harper-pos-utils = { path = "../harper-pos-utils/", version = "1.0.0" }
|
||||
lazy_static = "1.5.0"
|
||||
serde_json = "1.0.145"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
use harper_pos_utils::{BurnChunkerCpu, CachedChunker};
|
||||
use lazy_static::lazy_static;
|
||||
use std::num::NonZero;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use harper_pos_utils::{BrillChunker, BrillTagger, Chunker, FreqDict, Tagger, UPOS};
|
||||
pub use harper_pos_utils::{
|
||||
BrillChunker, BrillTagger, BurnChunkerCpu, CachedChunker, Chunker, FreqDict, Tagger, UPOS,
|
||||
};
|
||||
|
||||
const BRILL_TAGGER_SOURCE: &str = include_str!("../trained_tagger_model.json");
|
||||
|
||||
|
|
@ -16,6 +17,8 @@ fn uncached_brill_tagger() -> BrillTagger<FreqDict> {
|
|||
serde_json::from_str(BRILL_TAGGER_SOURCE).unwrap()
|
||||
}
|
||||
|
||||
/// Get a copy of a shared, lazily-initialized [`BrillTagger`]. There will be only one instance
|
||||
/// per-process.
|
||||
pub fn brill_tagger() -> Arc<BrillTagger<FreqDict>> {
|
||||
(*BRILL_TAGGER).clone()
|
||||
}
|
||||
|
|
@ -30,6 +33,8 @@ fn uncached_brill_chunker() -> BrillChunker {
|
|||
serde_json::from_str(BRILL_CHUNKER_SOURCE).unwrap()
|
||||
}
|
||||
|
||||
/// Get a copy of a shared, lazily-initialized [`BrillChunker`]. There will be only one instance
|
||||
/// per-process.
|
||||
pub fn brill_chunker() -> Arc<BrillChunker> {
|
||||
(*BRILL_CHUNKER).clone()
|
||||
}
|
||||
|
|
@ -48,6 +53,9 @@ fn uncached_burn_chunker() -> CachedChunker<BurnChunkerCpu> {
|
|||
)
|
||||
}
|
||||
|
||||
/// Get a copy of a shared, lazily-initialized [`BurnChunkerCpu`]. There will be only one instance
|
||||
/// per-process. Since neural net inference is extremely expensive, this chunker is memoized as
|
||||
/// well.
|
||||
pub fn burn_chunker() -> Rc<CachedChunker<BurnChunkerCpu>> {
|
||||
(BURN_CHUNKER).with(|c| c.clone())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,20 +8,23 @@ repository = "https://github.com/automattic/harper"
|
|||
[dependencies]
|
||||
anyhow = "1.0.100"
|
||||
ariadne = "0.4.1"
|
||||
clap = { version = "4.5.51", features = ["derive", "std", "string"], default-features = false }
|
||||
harper-stats = { path = "../harper-stats", version = "0.71.0" }
|
||||
clap = { version = "4.5.53", features = ["derive", "std", "string"], default-features = false }
|
||||
harper-stats = { path = "../harper-stats", version = "1.0.0" }
|
||||
dirs = "6.0.0"
|
||||
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.71.0" }
|
||||
harper-python = { path = "../harper-python", version = "0.71.0" }
|
||||
harper-core = { path = "../harper-core", version = "0.71.0" }
|
||||
harper-pos-utils = { path = "../harper-pos-utils", version = "0.71.0", features = [] }
|
||||
harper-comments = { path = "../harper-comments", version = "0.71.0" }
|
||||
harper-typst = { path = "../harper-typst", version = "0.71.0" }
|
||||
hashbrown = "0.16.0"
|
||||
harper-literate-haskell = { path = "../harper-literate-haskell", version = "1.0.0" }
|
||||
harper-python = { path = "../harper-python", version = "1.0.0" }
|
||||
harper-core = { path = "../harper-core", version = "1.0.0" }
|
||||
harper-pos-utils = { path = "../harper-pos-utils", version = "1.0.0", features = [] }
|
||||
harper-comments = { path = "../harper-comments", version = "1.0.0" }
|
||||
harper-typst = { path = "../harper-typst", version = "1.0.0" }
|
||||
hashbrown = "0.16.1"
|
||||
rayon = "1.11.0"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
strum = "0.27.2"
|
||||
harper-ink = { version = "0.71.0", path = "../harper-ink" }
|
||||
strum_macros = "0.27.2"
|
||||
harper-ink = { version = "1.0.0", path = "../harper-ink" }
|
||||
either = "1.15.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ impl Annotation {
|
|||
|
||||
/// Gets an iterator of annotation `Label` from the given document.
|
||||
///
|
||||
/// This is similar to [`self::iter_from_document()`], but this additionally converts
|
||||
/// This is similar to [`Self::iter_from_document`], but this additionally converts
|
||||
/// the [`Annotation`] into [`ariadne::Label`] for convenience.
|
||||
pub(super) fn iter_labels_from_document<'inpt_id>(
|
||||
annotation_type: AnnotationType,
|
||||
|
|
|
|||
|
|
@ -12,20 +12,33 @@ use harper_core::{
|
|||
pub(super) enum Input {
|
||||
/// File (path) input.
|
||||
File(PathBuf),
|
||||
/// Directory (path) input.
|
||||
Dir(PathBuf),
|
||||
/// Direct text input, via the command line.
|
||||
Text(String),
|
||||
/// Standard input (stdin).
|
||||
Stdin(String),
|
||||
}
|
||||
impl Input {
|
||||
/// Loads the contained file/string into a conventional format. Returns a `Result` containing
|
||||
/// a tuple of a `Document` and its corresponding source text as a string.
|
||||
pub(super) fn load(
|
||||
&self,
|
||||
batch_mode: bool,
|
||||
markdown_options: MarkdownOptions,
|
||||
dictionary: &impl Dictionary,
|
||||
) -> anyhow::Result<(Document, String)> {
|
||||
) -> anyhow::Result<(Option<Document>, String)> {
|
||||
match self {
|
||||
Input::File(file) => super::load_file(file, markdown_options, dictionary),
|
||||
Input::Text(s) => Ok((Document::new(s, &PlainEnglish, dictionary), s.clone())),
|
||||
Input::File(file) => super::load_file(
|
||||
file,
|
||||
Some(&self.get_identifier()),
|
||||
batch_mode,
|
||||
markdown_options,
|
||||
dictionary,
|
||||
),
|
||||
Input::Dir(dir) => Ok((None, dir.display().to_string())),
|
||||
Input::Text(s) => Ok((Some(Document::new(s, &PlainEnglish, dictionary)), s.clone())),
|
||||
Input::Stdin(s) => Ok((Some(Document::new(s, &PlainEnglish, dictionary)), s.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +50,11 @@ impl Input {
|
|||
Input::File(file) => file
|
||||
.file_name()
|
||||
.map_or(Cow::from("<file>"), |file_name| file_name.to_string_lossy()),
|
||||
Input::Text(_) => Cow::from("<input>"),
|
||||
Input::Dir(dir) => dir
|
||||
.file_name()
|
||||
.map_or(Cow::from("<dir>"), |dir_name| dir_name.to_string_lossy()),
|
||||
Input::Text(_) => Cow::from("<text>"),
|
||||
Input::Stdin(_) => Cow::from("<stdin>"),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +63,7 @@ impl Input {
|
|||
pub(super) fn try_from_stdin() -> anyhow::Result<Self> {
|
||||
let mut buf = String::new();
|
||||
std::io::stdin().lock().read_to_string(&mut buf)?;
|
||||
Ok(Self::from(buf))
|
||||
Ok(Self::Stdin(buf))
|
||||
}
|
||||
}
|
||||
// This allows this type to be directly used with clap as an argument.
|
||||
|
|
@ -60,6 +77,11 @@ impl From<String> for Input {
|
|||
{
|
||||
// Input is a valid file path.
|
||||
Self::File(input_string.into())
|
||||
} else if let Ok(metadata) = std::fs::metadata(&input_string)
|
||||
&& metadata.is_dir()
|
||||
{
|
||||
// Input is a valid directory path.
|
||||
Self::Dir(input_string.into())
|
||||
} else {
|
||||
// Input is not a valid file path, we assume it's intended to be a string.
|
||||
Self::Text(input_string)
|
||||
|
|
|
|||
770
harper-cli/src/lint.rs
Normal file
770
harper-cli/src/lint.rs
Normal file
|
|
@ -0,0 +1,770 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::{fs, process};
|
||||
|
||||
use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
|
||||
use hashbrown::HashMap;
|
||||
use rayon::prelude::*;
|
||||
|
||||
use harper_core::{
|
||||
linting::{Lint, LintGroup, LintGroupConfig, LintKind},
|
||||
parsers::MarkdownOptions,
|
||||
spell::{Dictionary, MergedDictionary, MutableDictionary},
|
||||
{Dialect, DictWordMetadata, Document, Token, TokenKind, remove_overlaps_map},
|
||||
};
|
||||
|
||||
use crate::input::Input;
|
||||
|
||||
/// Sync version of harper-ls/src/dictionary_io@load_dict
|
||||
fn load_dict(path: &Path) -> anyhow::Result<MutableDictionary> {
|
||||
let str = fs::read_to_string(path)?;
|
||||
|
||||
let mut dict = MutableDictionary::new();
|
||||
dict.extend_words(
|
||||
str.lines()
|
||||
.map(|l| (l.chars().collect::<Vec<_>>(), DictWordMetadata::default())),
|
||||
);
|
||||
|
||||
Ok(dict)
|
||||
}
|
||||
|
||||
/// Path version of harper-ls/src/dictionary_io@file_dict_name
|
||||
fn file_dict_name(path: &Path) -> PathBuf {
|
||||
let mut rewritten = String::new();
|
||||
|
||||
for seg in path.components() {
|
||||
if !matches!(seg, Component::RootDir) {
|
||||
rewritten.push_str(&seg.as_os_str().to_string_lossy());
|
||||
rewritten.push('%');
|
||||
}
|
||||
}
|
||||
|
||||
rewritten.into()
|
||||
}
|
||||
|
||||
pub struct LintOptions<'a> {
|
||||
pub count: bool,
|
||||
pub ignore: &'a Option<Vec<String>>,
|
||||
pub only: &'a Option<Vec<String>>,
|
||||
pub dialect: Dialect,
|
||||
}
|
||||
enum ReportStyle {
|
||||
FullAriadneLintReport,
|
||||
BriefCountsOnlyLintReport,
|
||||
}
|
||||
|
||||
struct InputInfo<'a> {
|
||||
parent_input_id: &'a str,
|
||||
input: Input,
|
||||
}
|
||||
|
||||
struct InputJob {
|
||||
batch_mode: bool,
|
||||
parent_input_id: String,
|
||||
input: Input,
|
||||
}
|
||||
|
||||
trait InputPath {
|
||||
fn format_path(&self) -> String;
|
||||
}
|
||||
|
||||
impl InputPath for InputInfo<'_> {
|
||||
fn format_path(&self) -> String {
|
||||
let child = self.input.get_identifier();
|
||||
if self.parent_input_id.is_empty() {
|
||||
child.into_owned()
|
||||
} else {
|
||||
format!("\x1b[33m{}/\x1b[0m{}", self.parent_input_id, child)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lint(
|
||||
markdown_options: MarkdownOptions,
|
||||
curated_dictionary: Arc<dyn Dictionary>,
|
||||
inputs: Vec<Input>,
|
||||
lint_options: LintOptions,
|
||||
user_dict_path: PathBuf,
|
||||
// TODO workspace_dict_path?
|
||||
file_dict_path: PathBuf,
|
||||
) -> anyhow::Result<()> {
|
||||
let LintOptions {
|
||||
count,
|
||||
ignore,
|
||||
only,
|
||||
dialect,
|
||||
} = lint_options;
|
||||
|
||||
// Zero or more inputs, default to stdin if not provided
|
||||
let all_user_inputs = if inputs.is_empty() {
|
||||
vec![Input::try_from_stdin().unwrap()]
|
||||
} else {
|
||||
inputs
|
||||
};
|
||||
|
||||
// Filter out any rules from ignore/only lists that don't exist in the current config
|
||||
// Uses a cached config to avoid expensive linter initialization
|
||||
let config = LintGroupConfig::new_curated();
|
||||
let mut ignore = ignore.clone();
|
||||
let mut only = only.clone();
|
||||
|
||||
if let Some(ref mut only) = only {
|
||||
only.retain(|rule| {
|
||||
if !config.has_rule(rule) {
|
||||
eprintln!("Warning: Cannot enable unknown rule '{}'.", rule);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(ref mut ignore) = ignore {
|
||||
ignore.retain(|rule| {
|
||||
if !config.has_rule(rule) {
|
||||
eprintln!("Warning: Cannot disable unknown rule '{}'.", rule);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
}
|
||||
|
||||
// Create merged dictionary with base dictionary
|
||||
let mut curated_plus_user_dict = MergedDictionary::new();
|
||||
curated_plus_user_dict.add_dictionary(Arc::new(curated_dictionary));
|
||||
|
||||
let user_dict_msg = match load_dict(&user_dict_path) {
|
||||
Ok(user_dict) => {
|
||||
curated_plus_user_dict.add_dictionary(Arc::new(user_dict));
|
||||
"Using"
|
||||
}
|
||||
Err(_) => "There is no",
|
||||
};
|
||||
println!(
|
||||
"Note: {user_dict_msg} user dictionary at {}",
|
||||
user_dict_path.display()
|
||||
);
|
||||
|
||||
// The lint stats for all files
|
||||
let mut all_lint_kinds: HashMap<LintKind, usize> = HashMap::new();
|
||||
let mut all_rules: HashMap<String, usize> = HashMap::new();
|
||||
let mut all_lint_kind_rule_pairs: HashMap<(LintKind, String), usize> = HashMap::new();
|
||||
let mut all_spellos: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
// Convert the 'count' flag into a ReportStyle enum
|
||||
let report_mode = match count {
|
||||
true => ReportStyle::BriefCountsOnlyLintReport,
|
||||
false => ReportStyle::FullAriadneLintReport,
|
||||
};
|
||||
|
||||
let mut input_jobs = Vec::new();
|
||||
for user_input in all_user_inputs {
|
||||
let (batch_mode, maybe_dir) = match &user_input {
|
||||
Input::Dir(dir) => (true, std::fs::read_dir(dir).ok()),
|
||||
_ => (false, None),
|
||||
};
|
||||
|
||||
let parent_input_id = if batch_mode {
|
||||
user_input.get_identifier().to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
if let Some(dir) = maybe_dir {
|
||||
let mut entries: Vec<_> = dir
|
||||
.filter_map(Result::ok)
|
||||
.filter(|entry| entry.file_type().map(|ft| !ft.is_dir()).unwrap_or(false))
|
||||
.collect();
|
||||
|
||||
entries.sort_by_key(|entry| entry.file_name());
|
||||
|
||||
for entry in entries {
|
||||
input_jobs.push(InputJob {
|
||||
batch_mode,
|
||||
parent_input_id: parent_input_id.clone(),
|
||||
input: Input::File(entry.path()),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
input_jobs.push(InputJob {
|
||||
batch_mode,
|
||||
parent_input_id,
|
||||
input: user_input.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let per_input_results = {
|
||||
let run_job = |job: InputJob| {
|
||||
let InputJob {
|
||||
batch_mode,
|
||||
parent_input_id,
|
||||
input,
|
||||
} = job;
|
||||
let parent_id_ref = parent_input_id.as_str();
|
||||
|
||||
lint_one_input(
|
||||
// Common properties of harper-cli
|
||||
markdown_options,
|
||||
&curated_plus_user_dict,
|
||||
// Passed from the user for the `lint` subcommand
|
||||
&report_mode,
|
||||
LintOptions {
|
||||
count,
|
||||
ignore: &ignore,
|
||||
only: &only,
|
||||
dialect,
|
||||
},
|
||||
&file_dict_path,
|
||||
// Are we linting multiple inputs inside a directory?
|
||||
batch_mode,
|
||||
// The current input to be linted
|
||||
InputInfo {
|
||||
parent_input_id: parent_id_ref,
|
||||
input,
|
||||
},
|
||||
)
|
||||
};
|
||||
|
||||
if input_jobs.len() > 1 {
|
||||
input_jobs.into_par_iter().map(run_job).collect::<Vec<_>>()
|
||||
} else {
|
||||
input_jobs.into_iter().map(run_job).collect::<Vec<_>>()
|
||||
}
|
||||
};
|
||||
|
||||
for lint_results in per_input_results {
|
||||
// Update the global stats
|
||||
for (kind, count) in lint_results.0 {
|
||||
*all_lint_kinds.entry(kind).or_insert(0) += count;
|
||||
}
|
||||
for (rule, count) in lint_results.1 {
|
||||
*all_rules.entry(rule).or_insert(0) += count;
|
||||
}
|
||||
for ((kind, rule), count) in lint_results.2 {
|
||||
*all_lint_kind_rule_pairs.entry((kind, rule)).or_insert(0) += count;
|
||||
}
|
||||
for (word, count) in lint_results.3 {
|
||||
*all_spellos.entry(word).or_insert(0) += count;
|
||||
}
|
||||
}
|
||||
|
||||
final_report(
|
||||
dialect,
|
||||
true,
|
||||
all_lint_kinds,
|
||||
all_rules,
|
||||
all_lint_kind_rule_pairs,
|
||||
all_spellos,
|
||||
);
|
||||
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
type LintKindCount = HashMap<LintKind, usize>;
|
||||
type LintRuleCount = HashMap<String, usize>;
|
||||
type LintKindRulePairCount = HashMap<(LintKind, String), usize>;
|
||||
type SpelloCount = HashMap<String, usize>;
|
||||
|
||||
struct FullInputInfo<'a> {
|
||||
input: InputInfo<'a>,
|
||||
doc: Document,
|
||||
source: String,
|
||||
}
|
||||
|
||||
fn lint_one_input(
|
||||
// Common properties of harper-cli
|
||||
markdown_options: MarkdownOptions,
|
||||
curated_plus_user_dict: &MergedDictionary,
|
||||
report_mode: &ReportStyle,
|
||||
// Options passed from the user specific to the `lint` subcommand
|
||||
lint_options: LintOptions,
|
||||
file_dict_path: &Path,
|
||||
// Are we linting multiple inputs?
|
||||
batch_mode: bool,
|
||||
// For the current input
|
||||
current: InputInfo,
|
||||
) -> (
|
||||
LintKindCount,
|
||||
LintRuleCount,
|
||||
LintKindRulePairCount,
|
||||
SpelloCount,
|
||||
) {
|
||||
let LintOptions {
|
||||
count: _,
|
||||
ignore,
|
||||
only,
|
||||
dialect,
|
||||
} = lint_options;
|
||||
|
||||
let mut lint_kinds: HashMap<LintKind, usize> = HashMap::new();
|
||||
let mut lint_rules: HashMap<String, usize> = HashMap::new();
|
||||
let mut lint_kind_rule_pairs: HashMap<(LintKind, String), usize> = HashMap::new();
|
||||
let mut spellos: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
// Create a new merged dictionary for this input.
|
||||
let mut merged_dictionary = curated_plus_user_dict.clone();
|
||||
|
||||
// If processing a file, try to load its per-file dictionary
|
||||
if let Input::File(ref file) = current.input {
|
||||
let dict_path = file_dict_path.join(file_dict_name(file));
|
||||
if let Ok(file_dictionary) = load_dict(&dict_path) {
|
||||
merged_dictionary.add_dictionary(Arc::new(file_dictionary));
|
||||
println!(
|
||||
"{}: Note: Using per-file dictionary: {}",
|
||||
current.format_path(),
|
||||
dict_path.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
match current
|
||||
.input
|
||||
.load(batch_mode, markdown_options, &merged_dictionary)
|
||||
{
|
||||
Err(err) => eprintln!("{}", err),
|
||||
Ok((maybe_doc, source)) => {
|
||||
if let Some(doc) = maybe_doc {
|
||||
// Create the Lint Group from which we will lint this input, using the combined dictionary and the specified dialect
|
||||
let mut lint_group = LintGroup::new_curated(merged_dictionary.into(), dialect);
|
||||
|
||||
// Turn specified rules on or off
|
||||
configure_lint_group(&mut lint_group, only, ignore);
|
||||
|
||||
// Run the linter, getting back a map of rule name -> lints
|
||||
let mut named_lints = lint_group.organized_lints(&doc);
|
||||
|
||||
// Lint counts, for brief reporting
|
||||
let lint_count_before = named_lints.values().map(|v| v.len()).sum::<usize>();
|
||||
remove_overlaps_map(&mut named_lints);
|
||||
let lint_count_after = named_lints.values().map(|v| v.len()).sum::<usize>();
|
||||
|
||||
// Extract the lint kinds and rules etc. for reporting
|
||||
(lint_kinds, lint_rules) = count_lint_kinds_and_rules(&named_lints);
|
||||
lint_kind_rule_pairs = collect_lint_kind_rule_pairs(&named_lints);
|
||||
spellos = collect_spellos(&named_lints, doc.get_source());
|
||||
|
||||
single_input_report(
|
||||
&FullInputInfo {
|
||||
input: InputInfo {
|
||||
parent_input_id: current.parent_input_id,
|
||||
input: current.input,
|
||||
},
|
||||
doc,
|
||||
source,
|
||||
},
|
||||
// Linting results of this input
|
||||
&named_lints,
|
||||
(lint_count_before, lint_count_after),
|
||||
&lint_kinds,
|
||||
&lint_rules,
|
||||
// Reporting arguments
|
||||
batch_mode,
|
||||
report_mode,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(lint_kinds, lint_rules, lint_kind_rule_pairs, spellos)
|
||||
}
|
||||
|
||||
fn configure_lint_group(
|
||||
lint_group: &mut LintGroup,
|
||||
only: &Option<Vec<String>>,
|
||||
ignore: &Option<Vec<String>>,
|
||||
) {
|
||||
if let Some(rules) = only {
|
||||
lint_group.set_all_rules_to(Some(false));
|
||||
rules
|
||||
.iter()
|
||||
.for_each(|rule| lint_group.config.set_rule_enabled(rule, true));
|
||||
}
|
||||
|
||||
if let Some(rules) = ignore {
|
||||
rules
|
||||
.iter()
|
||||
.for_each(|rule| lint_group.config.set_rule_enabled(rule, false));
|
||||
}
|
||||
}
|
||||
|
||||
fn count_lint_kinds_and_rules(
|
||||
named_lints: &BTreeMap<String, Vec<Lint>>,
|
||||
) -> (HashMap<LintKind, usize>, HashMap<String, usize>) {
|
||||
let mut kinds = HashMap::new();
|
||||
let mut rules = HashMap::new();
|
||||
|
||||
for (rule_name, lints) in named_lints {
|
||||
lints
|
||||
.iter()
|
||||
.for_each(|lint| *kinds.entry(lint.lint_kind).or_insert(0) += 1);
|
||||
|
||||
if !lints.is_empty() {
|
||||
*rules.entry(rule_name.to_string()).or_insert(0) += lints.len();
|
||||
}
|
||||
}
|
||||
|
||||
(kinds, rules)
|
||||
}
|
||||
|
||||
fn collect_lint_kind_rule_pairs(
|
||||
named_lints: &BTreeMap<String, Vec<Lint>>,
|
||||
) -> HashMap<(LintKind, String), usize> {
|
||||
let mut pairs = HashMap::new();
|
||||
|
||||
for (rule_name, lints) in named_lints {
|
||||
for lint in lints {
|
||||
pairs
|
||||
.entry((lint.lint_kind, rule_name.to_string()))
|
||||
.and_modify(|count| *count += 1)
|
||||
.or_insert(1);
|
||||
}
|
||||
}
|
||||
|
||||
pairs
|
||||
}
|
||||
|
||||
fn collect_spellos(
|
||||
named_lints: &BTreeMap<String, Vec<Lint>>,
|
||||
source: &[char],
|
||||
) -> HashMap<String, usize> {
|
||||
named_lints
|
||||
.get("SpellCheck")
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|lint| lint.span.get_content_string(source))
|
||||
.fold(HashMap::new(), |mut acc, spello| {
|
||||
*acc.entry(spello).or_insert(0) += 1;
|
||||
acc
|
||||
})
|
||||
}
|
||||
|
||||
fn single_input_report(
|
||||
// Properties of the current input
|
||||
input_info: &FullInputInfo,
|
||||
// Linting results of this input
|
||||
named_lints: &BTreeMap<String, Vec<Lint>>,
|
||||
lint_count: (usize, usize),
|
||||
lint_kinds: &HashMap<LintKind, usize>,
|
||||
lint_rules: &HashMap<String, usize>,
|
||||
// Reporting parameters
|
||||
batch_mode: bool, // If true, we are processing multiple files, which affects how we report
|
||||
report_mode: &ReportStyle,
|
||||
) {
|
||||
let FullInputInfo { input, doc, source } = input_info;
|
||||
let (lint_count_before, lint_count_after) = lint_count;
|
||||
// The Ariadne report works poorly for files with very long lines, so suppress it unless only processing one file
|
||||
const MAX_LINE_LEN: usize = 150;
|
||||
|
||||
let mut report_mode = report_mode;
|
||||
let longest = find_longest_doc_line(doc.get_tokens());
|
||||
|
||||
if batch_mode
|
||||
&& longest > MAX_LINE_LEN
|
||||
&& matches!(report_mode, ReportStyle::FullAriadneLintReport)
|
||||
{
|
||||
report_mode = &ReportStyle::BriefCountsOnlyLintReport;
|
||||
println!(
|
||||
"{}: Longest line: {longest} exceeds max line length: {MAX_LINE_LEN}",
|
||||
input.format_path()
|
||||
);
|
||||
}
|
||||
|
||||
// Report the number of lints no matter what report mode we are in
|
||||
println!(
|
||||
"{}: {}",
|
||||
input.format_path(),
|
||||
match (lint_count_before, lint_count_after) {
|
||||
(0, _) => "No lints found".to_string(),
|
||||
(before, after) if before != after =>
|
||||
format!("{before} lints before overlap removal, {after} after"),
|
||||
(before, _) => format!("{before} lints"),
|
||||
}
|
||||
);
|
||||
|
||||
// If we are in Ariadne mode, print the report
|
||||
if matches!(report_mode, ReportStyle::FullAriadneLintReport) {
|
||||
let primary_color = Color::Magenta;
|
||||
|
||||
let input_identifier = input.input.get_identifier();
|
||||
|
||||
if lint_count_after != 0 {
|
||||
let mut report_builder = Report::build(ReportKind::Advice, &input_identifier, 0);
|
||||
|
||||
for (rule_name, lints) in named_lints {
|
||||
for lint in lints {
|
||||
let (r, g, b) = rgb_for_lint_kind(Some(&lint.lint_kind));
|
||||
report_builder = report_builder.with_label(
|
||||
Label::new((&input_identifier, lint.span.into()))
|
||||
.with_message(format!(
|
||||
"{}: {}",
|
||||
format_args!("[{}::{}]", lint.lint_kind, rule_name)
|
||||
.fg(ariadne::Color::Rgb(r, g, b)),
|
||||
lint.message
|
||||
))
|
||||
.with_color(primary_color),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let report = report_builder.finish();
|
||||
report.print((&input_identifier, Source::from(source))).ok();
|
||||
}
|
||||
}
|
||||
|
||||
// Print the more detailed counts for the lint kinds and then for the rules
|
||||
if !lint_kinds.is_empty() {
|
||||
let mut lint_kinds_vec: Vec<_> = lint_kinds.iter().collect();
|
||||
lint_kinds_vec.sort_by_key(|(lk, count)| (std::cmp::Reverse(**count), lk.to_string()));
|
||||
|
||||
let lk_vec: Vec<(Option<String>, String)> = lint_kinds_vec
|
||||
.into_iter()
|
||||
.map(|(lk, c)| {
|
||||
let (r, g, b) = rgb_for_lint_kind(Some(lk));
|
||||
(
|
||||
Some(format!("\x1b[38;2;{r};{g};{b}m")),
|
||||
format!("[{lk}: {c}]"),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
println!("lint kinds:");
|
||||
print_formatted_items(lk_vec);
|
||||
}
|
||||
|
||||
if !lint_rules.is_empty() {
|
||||
let mut rules_vec: Vec<_> = lint_rules.iter().collect();
|
||||
rules_vec.sort_by_key(|(rn, count)| (std::cmp::Reverse(**count), rn.to_string()));
|
||||
|
||||
let r_vec: Vec<(Option<String>, String)> = rules_vec
|
||||
.into_iter()
|
||||
.map(|(rn, c)| (None, format!("<{rn}: {c}>")))
|
||||
.collect();
|
||||
|
||||
println!("rules:");
|
||||
print_formatted_items(r_vec);
|
||||
}
|
||||
}
|
||||
|
||||
fn find_longest_doc_line(toks: &[Token]) -> usize {
|
||||
let mut longest_len_chars = 0;
|
||||
let mut curr_len_chars = 0;
|
||||
let mut current_line_start_tok_idx = 0;
|
||||
|
||||
for (idx, tok) in toks.iter().enumerate() {
|
||||
if matches!(tok.kind, TokenKind::Newline(_))
|
||||
|| matches!(tok.kind, TokenKind::ParagraphBreak)
|
||||
{
|
||||
if curr_len_chars > longest_len_chars {
|
||||
longest_len_chars = curr_len_chars;
|
||||
}
|
||||
curr_len_chars = 0;
|
||||
current_line_start_tok_idx = idx + 1;
|
||||
} else if matches!(tok.kind, TokenKind::Unlintable) {
|
||||
// TODO would be more accurate to scan for \n in the tok.span.get_content(src)
|
||||
} else {
|
||||
curr_len_chars += tok.span.len();
|
||||
}
|
||||
}
|
||||
|
||||
if curr_len_chars > longest_len_chars
|
||||
&& !toks.is_empty()
|
||||
&& current_line_start_tok_idx < toks.len()
|
||||
{
|
||||
longest_len_chars = curr_len_chars;
|
||||
}
|
||||
|
||||
longest_len_chars
|
||||
}
|
||||
|
||||
fn final_report(
|
||||
dialect: Dialect,
|
||||
batch_mode: bool,
|
||||
all_lint_kinds: HashMap<LintKind, usize>,
|
||||
all_rules: HashMap<String, usize>,
|
||||
all_lint_kind_rule_pairs: HashMap<(LintKind, String), usize>,
|
||||
all_spellos: HashMap<String, usize>,
|
||||
) {
|
||||
// The stats summary of all inputs that we only do when there are multiple inputs.
|
||||
if batch_mode {
|
||||
let mut all_files_lint_kind_counts_vec: Vec<(LintKind, _)> =
|
||||
all_lint_kinds.into_iter().collect();
|
||||
all_files_lint_kind_counts_vec
|
||||
.sort_by_key(|(lk, count)| (std::cmp::Reverse(*count), lk.to_string()));
|
||||
|
||||
let lint_kind_counts: Vec<(Option<String>, String)> = all_files_lint_kind_counts_vec
|
||||
.into_iter()
|
||||
.map(|(lint_kind, c)| {
|
||||
let (r, g, b) = rgb_for_lint_kind(Some(&lint_kind));
|
||||
(
|
||||
Some(format!("\x1b[38;2;{r};{g};{b}m")),
|
||||
format!("[{lint_kind}: {c}]"),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if !lint_kind_counts.is_empty() {
|
||||
println!("All files lint kinds:");
|
||||
print_formatted_items(lint_kind_counts);
|
||||
}
|
||||
|
||||
let mut all_files_rule_name_counts_vec: Vec<_> = all_rules.into_iter().collect();
|
||||
all_files_rule_name_counts_vec
|
||||
.sort_by_key(|(rule_name, count)| (std::cmp::Reverse(*count), rule_name.to_string()));
|
||||
|
||||
let rule_name_counts: Vec<(Option<String>, String)> = all_files_rule_name_counts_vec
|
||||
.into_iter()
|
||||
.map(|(rule_name, count)| (None, format!("({rule_name}: {count})")))
|
||||
.collect();
|
||||
|
||||
if !rule_name_counts.is_empty() {
|
||||
println!("All files rule names:");
|
||||
print_formatted_items(rule_name_counts);
|
||||
}
|
||||
}
|
||||
|
||||
// The stats summary of all pairs of lint kind + rule name, whether there is only one input or multiple.
|
||||
let mut lint_kind_rule_pairs: Vec<_> = all_lint_kind_rule_pairs.into_iter().collect();
|
||||
lint_kind_rule_pairs.sort_by(|a, b| {
|
||||
let (a, b) = ((&a.0, &a.1), (&b.0, &b.1));
|
||||
b.1.cmp(a.1)
|
||||
.then_with(|| a.0.0.to_string().cmp(&b.0.0.to_string()))
|
||||
.then_with(|| a.0.1.cmp(&b.0.1))
|
||||
});
|
||||
|
||||
// Format them using their colours
|
||||
let formatted_lint_kind_rule_pairs: Vec<(Option<String>, String)> = lint_kind_rule_pairs
|
||||
.into_iter()
|
||||
.map(|ele| {
|
||||
let (r, g, b) = rgb_for_lint_kind(Some(&ele.0.0));
|
||||
let ansi_prefix = format!("\x1b[38;2;{r};{g};{b}m");
|
||||
(
|
||||
Some(ansi_prefix),
|
||||
format!("«« {} {}·{} »»", ele.1, ele.0.0, ele.0.1),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if !formatted_lint_kind_rule_pairs.is_empty() {
|
||||
// Print them with line wrapping
|
||||
print_formatted_items(formatted_lint_kind_rule_pairs);
|
||||
}
|
||||
|
||||
if !all_spellos.is_empty() {
|
||||
// Group by lowercase spelling while preserving original case and counts
|
||||
let mut grouped: HashMap<String, Vec<(String, usize)>> = HashMap::new();
|
||||
for (spelling, count) in all_spellos {
|
||||
grouped
|
||||
.entry(spelling.to_lowercase())
|
||||
.or_default()
|
||||
.push((spelling, count));
|
||||
}
|
||||
|
||||
// Create a vector of (lowercase_spelling, variants, total_count)
|
||||
let mut grouped_vec: Vec<_> = grouped
|
||||
.into_iter()
|
||||
.map(|(lower, variants)| {
|
||||
let total: usize = variants.iter().map(|(_, c)| c).sum();
|
||||
(lower, variants, total)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by total count (descending), then by lowercase spelling
|
||||
grouped_vec.sort_by(|a, b| b.2.cmp(&a.2).then_with(|| a.0.cmp(&b.0)));
|
||||
|
||||
// Flatten the variants back out, but keep track of the group index for coloring
|
||||
let spelling_vec: Vec<(Option<String>, String)> = grouped_vec
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.flat_map(|(i, (_, variants, _))| {
|
||||
// Sort variants by count (descending) then by original spelling
|
||||
let mut variants = variants;
|
||||
variants.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
|
||||
|
||||
// Choose colour based on group index (rotating through three colours)
|
||||
let (r, g, b) = match i % 3 {
|
||||
0 => (180, 90, 150), // Magenta
|
||||
1 => (90, 180, 90), // Green
|
||||
_ => (90, 150, 180), // Cyan
|
||||
};
|
||||
let color = format!("\x1b[38;2;{};{};{}m", r, g, b);
|
||||
|
||||
variants
|
||||
.into_iter()
|
||||
.map(move |(spelling, c)| (Some(color.clone()), format!("(“{spelling}”: {c})")))
|
||||
})
|
||||
.collect();
|
||||
|
||||
println!("All files Spelling::SpellCheck (For dialect: {})", dialect);
|
||||
print_formatted_items(spelling_vec);
|
||||
}
|
||||
}
|
||||
|
||||
// Note: This must be kept synchronized with:
|
||||
// packages/lint-framework/src/lint/lintKindColor.ts
|
||||
// packages/web/src/lib/lintKindColor.ts
|
||||
// This can be removed when issue #1991 is resolved.
|
||||
fn lint_kind_to_rgb() -> &'static [(LintKind, (u8, u8, u8))] {
|
||||
&[
|
||||
(LintKind::Agreement, (0x22, 0x8B, 0x22)),
|
||||
(LintKind::BoundaryError, (0x8B, 0x45, 0x13)),
|
||||
(LintKind::Capitalization, (0x54, 0x0D, 0x6E)),
|
||||
(LintKind::Eggcorn, (0xFF, 0x8C, 0x00)),
|
||||
(LintKind::Enhancement, (0x0E, 0xAD, 0x69)),
|
||||
(LintKind::Formatting, (0x7D, 0x3C, 0x98)),
|
||||
(LintKind::Grammar, (0x9B, 0x59, 0xB6)),
|
||||
(LintKind::Malapropism, (0xC7, 0x15, 0x85)),
|
||||
(LintKind::Miscellaneous, (0x3B, 0xCE, 0xAC)),
|
||||
(LintKind::Nonstandard, (0x00, 0x8B, 0x8B)),
|
||||
(LintKind::Punctuation, (0xD4, 0x85, 0x0F)),
|
||||
(LintKind::Readability, (0x2E, 0x8B, 0x57)),
|
||||
(LintKind::Redundancy, (0x46, 0x82, 0xB4)),
|
||||
(LintKind::Regionalism, (0xC0, 0x61, 0xCB)),
|
||||
(LintKind::Repetition, (0x00, 0xA6, 0x7C)),
|
||||
(LintKind::Spelling, (0xEE, 0x42, 0x66)),
|
||||
(LintKind::Style, (0xFF, 0xD2, 0x3F)),
|
||||
(LintKind::Typo, (0xFF, 0x6B, 0x35)),
|
||||
(LintKind::Usage, (0x1E, 0x90, 0xFF)),
|
||||
(LintKind::WordChoice, (0x22, 0x8B, 0x22)),
|
||||
]
|
||||
}
|
||||
|
||||
fn rgb_for_lint_kind(olk: Option<&LintKind>) -> (u8, u8, u8) {
|
||||
olk.and_then(|lk| {
|
||||
lint_kind_to_rgb()
|
||||
.iter()
|
||||
.find(|(k, _)| k == lk)
|
||||
.map(|(_, color)| *color)
|
||||
})
|
||||
.unwrap_or((0, 0, 0))
|
||||
}
|
||||
|
||||
fn print_formatted_items(items: impl IntoIterator<Item = (Option<String>, String)>) {
|
||||
let mut first_on_line = true;
|
||||
let mut len_so_far = 0;
|
||||
|
||||
for (ansi, text) in items {
|
||||
let text_len = text.len();
|
||||
|
||||
let mut len_to_add = !first_on_line as usize + text_len;
|
||||
|
||||
let mut before = "";
|
||||
if len_so_far + len_to_add > 120 {
|
||||
before = "\n";
|
||||
len_to_add -= 1; // no space before the first item
|
||||
len_so_far = 0;
|
||||
} else if !first_on_line {
|
||||
before = " ";
|
||||
}
|
||||
|
||||
let (set, reset): (&str, &str) = if let Some(prefix) = ansi.as_ref() {
|
||||
(prefix, "\x1b[0m")
|
||||
} else {
|
||||
("", "")
|
||||
};
|
||||
print!("{}{}{}{}", before, set, text, reset);
|
||||
len_so_far += len_to_add;
|
||||
first_on_line = false;
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
|
@ -1,24 +1,23 @@
|
|||
#![doc = include_str!("../README.md")]
|
||||
|
||||
use harper_core::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary, WordId};
|
||||
use harper_core::spell::{Dictionary, FstDictionary, MutableDictionary, WordId};
|
||||
use hashbrown::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::{fs, process};
|
||||
use std::path::{Path, PathBuf};
|
||||
// use std::sync::Arc;
|
||||
use std::fs;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use ariadne::{Color, Label, Report, ReportKind, Source};
|
||||
use clap::Parser;
|
||||
use dirs::{config_dir, data_local_dir};
|
||||
use harper_comments::CommentParser;
|
||||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::linting::LintGroup;
|
||||
use harper_core::parsers::{Markdown, MarkdownOptions, OrgMode, PlainEnglish};
|
||||
use harper_core::{
|
||||
CharStringExt, Dialect, DictWordMetadata, Document, Span, TokenKind, TokenStringExt,
|
||||
dict_word_metadata_orthography::OrthFlags, remove_overlaps,
|
||||
CharStringExt, Dialect, DictWordMetadata, Document, OrthFlags, Span, TokenKind, TokenStringExt,
|
||||
};
|
||||
use harper_ink::InkParser;
|
||||
use harper_literate_haskell::LiterateHaskellParser;
|
||||
|
|
@ -28,6 +27,7 @@ use harper_python::PythonParser;
|
|||
|
||||
use harper_stats::Stats;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
mod input;
|
||||
use input::Input;
|
||||
|
|
@ -35,15 +35,19 @@ use input::Input;
|
|||
mod annotate_tokens;
|
||||
use annotate_tokens::{Annotation, AnnotationType};
|
||||
|
||||
mod lint;
|
||||
use crate::lint::lint;
|
||||
use lint::LintOptions;
|
||||
|
||||
/// A debugging tool for the Harper grammar checker.
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(version, about)]
|
||||
enum Args {
|
||||
/// Lint a provided document.
|
||||
/// Lint provided documents.
|
||||
Lint {
|
||||
/// The text or file you wish to grammar check. If not provided, it will be read from
|
||||
/// standard input.
|
||||
input: Option<Input>,
|
||||
inputs: Vec<Input>,
|
||||
/// Whether to merely print out the number of errors encountered,
|
||||
/// without further details.
|
||||
#[arg(short, long)]
|
||||
|
|
@ -91,7 +95,12 @@ enum Args {
|
|||
annotation_type: AnnotationType,
|
||||
},
|
||||
/// Get the metadata associated with one or more words.
|
||||
Metadata { words: Vec<String> },
|
||||
Metadata {
|
||||
words: Vec<String>,
|
||||
/// Only show the part-of-speech flags and emojis, not the full JSON
|
||||
#[arg(short, long)]
|
||||
brief: bool,
|
||||
},
|
||||
/// Get all the forms of a word using the affixes.
|
||||
Forms { line: String },
|
||||
/// Emit a decompressed, line-separated list of the words in Harper's dictionary.
|
||||
|
|
@ -161,6 +170,11 @@ enum Args {
|
|||
/// The directory containing the dictionary and affixes.
|
||||
dir: PathBuf,
|
||||
},
|
||||
/// Audit the `dictionary.dict` file.
|
||||
AuditDictionary {
|
||||
/// The directory containing the dictionary and affixes.
|
||||
dir: PathBuf,
|
||||
},
|
||||
/// Emit a decompressed, line-separated list of the compounds in Harper's dictionary.
|
||||
/// As long as there's either an open or hyphenated spelling.
|
||||
Compounds,
|
||||
|
|
@ -177,97 +191,41 @@ enum Args {
|
|||
fn main() -> anyhow::Result<()> {
|
||||
let args = Args::parse();
|
||||
let markdown_options = MarkdownOptions::default();
|
||||
let dictionary = FstDictionary::curated();
|
||||
let curated_dictionary = FstDictionary::curated();
|
||||
|
||||
match args {
|
||||
Args::Lint {
|
||||
input,
|
||||
inputs,
|
||||
count,
|
||||
ignore,
|
||||
only,
|
||||
dialect,
|
||||
user_dict_path,
|
||||
// TODO workspace_dict_path?
|
||||
file_dict_path,
|
||||
} => {
|
||||
// Try to read from standard input if `input` was not provided.
|
||||
let input = input.unwrap_or_else(|| Input::try_from_stdin().unwrap());
|
||||
|
||||
let mut merged_dict = MergedDictionary::new();
|
||||
merged_dict.add_dictionary(dictionary);
|
||||
|
||||
// Attempt to load user dictionary.
|
||||
match load_dict(&user_dict_path) {
|
||||
Ok(user_dict) => merged_dict.add_dictionary(Arc::new(user_dict)),
|
||||
Err(err) => println!("{}: {}", user_dict_path.display(), err),
|
||||
}
|
||||
|
||||
if let Input::File(ref file) = input {
|
||||
// Only attempt to load file dictionary if input is a file.
|
||||
let file_dict_path = file_dict_path.join(file_dict_name(file));
|
||||
match load_dict(&file_dict_path) {
|
||||
Ok(file_dict) => merged_dict.add_dictionary(Arc::new(file_dict)),
|
||||
Err(err) => println!("{}: {}", file_dict_path.display(), err),
|
||||
}
|
||||
}
|
||||
|
||||
// Load the file/text.
|
||||
let (doc, source) = input.load(markdown_options, &merged_dict)?;
|
||||
|
||||
let mut linter = LintGroup::new_curated(Arc::new(merged_dict), dialect);
|
||||
|
||||
if let Some(rules) = only {
|
||||
linter.set_all_rules_to(Some(false));
|
||||
|
||||
for rule in rules {
|
||||
linter.config.set_rule_enabled(rule, true);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(rules) = ignore {
|
||||
for rule in rules {
|
||||
linter.config.set_rule_enabled(rule, false);
|
||||
}
|
||||
}
|
||||
|
||||
let mut lints = linter.lint(&doc);
|
||||
|
||||
if count {
|
||||
println!("{}", lints.len());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if lints.is_empty() {
|
||||
println!("No lints found");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
remove_overlaps(&mut lints);
|
||||
|
||||
let primary_color = Color::Magenta;
|
||||
|
||||
let input_identifier = input.get_identifier();
|
||||
|
||||
let mut report_builder = Report::build(ReportKind::Advice, &input_identifier, 0);
|
||||
|
||||
for lint in lints {
|
||||
report_builder = report_builder.with_label(
|
||||
Label::new((&input_identifier, lint.span.into()))
|
||||
.with_message(lint.message)
|
||||
.with_color(primary_color),
|
||||
);
|
||||
}
|
||||
|
||||
let report = report_builder.finish();
|
||||
report.print((&input_identifier, Source::from(source)))?;
|
||||
|
||||
process::exit(1)
|
||||
lint(
|
||||
markdown_options,
|
||||
curated_dictionary,
|
||||
inputs,
|
||||
LintOptions {
|
||||
count,
|
||||
ignore: &ignore,
|
||||
only: &only,
|
||||
dialect,
|
||||
},
|
||||
user_dict_path,
|
||||
// TODO workspace_dict_path?
|
||||
file_dict_path,
|
||||
)
|
||||
}
|
||||
Args::Parse { input } => {
|
||||
// Try to read from standard input if `input` was not provided.
|
||||
let input = input.unwrap_or_else(|| Input::try_from_stdin().unwrap());
|
||||
|
||||
// Load the file/text.
|
||||
let (doc, _) = input.load(markdown_options, &dictionary)?;
|
||||
let (doc, _) = input.load(false, markdown_options, &curated_dictionary)?;
|
||||
let doc = doc.expect("Failed to load document");
|
||||
|
||||
for token in doc.tokens() {
|
||||
let json = serde_json::to_string(&token)?;
|
||||
|
|
@ -284,7 +242,8 @@ fn main() -> anyhow::Result<()> {
|
|||
let input = input.unwrap_or_else(|| Input::try_from_stdin().unwrap());
|
||||
|
||||
// Load the file/text.
|
||||
let (doc, source) = input.load(markdown_options, &dictionary)?;
|
||||
let (doc, source) = input.load(false, markdown_options, &curated_dictionary)?;
|
||||
let doc = doc.expect("Failed to load document");
|
||||
|
||||
let primary_color = Color::Blue;
|
||||
let secondary_color = Color::Magenta;
|
||||
|
|
@ -333,7 +292,8 @@ fn main() -> anyhow::Result<()> {
|
|||
let input = input.unwrap_or_else(|| Input::try_from_stdin().unwrap());
|
||||
|
||||
// Load the file/text.
|
||||
let (doc, source) = input.load(markdown_options, &dictionary)?;
|
||||
let (doc, source) = input.load(false, markdown_options, &curated_dictionary)?;
|
||||
let doc = doc.expect("Failed to load document");
|
||||
|
||||
let input_identifier = input.get_identifier();
|
||||
|
||||
|
|
@ -357,7 +317,7 @@ fn main() -> anyhow::Result<()> {
|
|||
Args::Words => {
|
||||
let mut word_str = String::new();
|
||||
|
||||
for word in dictionary.words_iter() {
|
||||
for word in curated_dictionary.words_iter() {
|
||||
word_str.clear();
|
||||
word_str.extend(word);
|
||||
|
||||
|
|
@ -366,14 +326,41 @@ fn main() -> anyhow::Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
Args::Metadata { words } => {
|
||||
let mut results = BTreeMap::new();
|
||||
Args::Metadata { words, brief } => {
|
||||
type PosPredicate = fn(&DictWordMetadata) -> bool;
|
||||
|
||||
const POS: &[(&str, PosPredicate)] = &[
|
||||
("N📦", |m| m.is_noun() && !m.is_proper_noun()),
|
||||
("O📛", DictWordMetadata::is_proper_noun),
|
||||
("V🏃", DictWordMetadata::is_verb),
|
||||
("J🌈", DictWordMetadata::is_adjective),
|
||||
("R🤷", DictWordMetadata::is_adverb),
|
||||
("C🔗", DictWordMetadata::is_conjunction),
|
||||
("D👉", DictWordMetadata::is_determiner),
|
||||
("P📥", |m| m.preposition),
|
||||
("I👤", DictWordMetadata::is_pronoun),
|
||||
];
|
||||
|
||||
for word in words {
|
||||
let metadata = dictionary.get_word_metadata_str(&word);
|
||||
results.insert(word, metadata);
|
||||
let meta = curated_dictionary.get_word_metadata_str(&word);
|
||||
let (flags, emojis) = meta.as_ref().map_or_else(
|
||||
|| (String::new(), String::new()),
|
||||
|md| {
|
||||
POS.iter()
|
||||
.filter(|&(_, pred)| pred(md))
|
||||
.map(|(syms, _)| {
|
||||
let mut ch = syms.chars();
|
||||
(ch.next().unwrap(), ch.next().unwrap())
|
||||
})
|
||||
.unzip()
|
||||
},
|
||||
);
|
||||
|
||||
let json = brief.then(String::new).unwrap_or_else(|| {
|
||||
format!("\n{}", serde_json::to_string_pretty(&meta).unwrap())
|
||||
});
|
||||
println!("{}: {} {}{}", word, flags, emojis, json);
|
||||
}
|
||||
let json = serde_json::to_string_pretty(&results).unwrap();
|
||||
println!("{json}");
|
||||
Ok(())
|
||||
}
|
||||
Args::SummarizeLintRecord { file } => {
|
||||
|
|
@ -456,7 +443,7 @@ fn main() -> anyhow::Result<()> {
|
|||
description: String,
|
||||
}
|
||||
|
||||
let linter = LintGroup::new_curated(dictionary, Dialect::American);
|
||||
let linter = LintGroup::new_curated(curated_dictionary, Dialect::American);
|
||||
|
||||
let default_config: HashMap<String, bool> =
|
||||
serde_json::from_str(&serde_json::to_string(&linter.config).unwrap()).unwrap();
|
||||
|
|
@ -478,7 +465,14 @@ fn main() -> anyhow::Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
Args::MineWords { file } => {
|
||||
let (doc, _source) = load_file(&file, MarkdownOptions::default(), &dictionary)?;
|
||||
let (doc, _source) = load_file(
|
||||
&file,
|
||||
None,
|
||||
false,
|
||||
MarkdownOptions::default(),
|
||||
&curated_dictionary,
|
||||
)?;
|
||||
let doc = doc.expect("Failed to load document");
|
||||
|
||||
let mut words = HashMap::new();
|
||||
|
||||
|
|
@ -548,8 +542,6 @@ fn main() -> anyhow::Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
Args::RenameFlag { old, new, dir } => {
|
||||
use serde_json::Value;
|
||||
|
||||
let dict_path = dir.join("dictionary.dict");
|
||||
let affixes_path = dir.join("annotations.json");
|
||||
|
||||
|
|
@ -668,11 +660,134 @@ fn main() -> anyhow::Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
Args::AuditDictionary { dir } => {
|
||||
let annotations_path = dir.join("annotations.json");
|
||||
let annotations_content = fs::read_to_string(&annotations_path)
|
||||
.map_err(|e| anyhow!("Failed to read annotations: {e}"))?;
|
||||
let annotations_json: Value = serde_json::from_str(&annotations_content)
|
||||
.map_err(|e| anyhow!("Failed to parse annotations.json: {e}"))?;
|
||||
|
||||
let annotations = annotations_json
|
||||
.as_object()
|
||||
.ok_or_else(|| anyhow!("annotations.json is not an object"))?;
|
||||
|
||||
let (affixes, properties) = ["affixes", "properties"]
|
||||
.iter()
|
||||
.map(|key| {
|
||||
annotations
|
||||
.get(*key)
|
||||
.and_then(Value::as_object)
|
||||
.ok_or_else(|| {
|
||||
anyhow!("Missing or invalid '{key}' key in annotations.json")
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map(|v| (v[0], v[1]))?;
|
||||
|
||||
let all_keys = affixes.keys().chain(properties.keys()).collect::<Vec<_>>();
|
||||
|
||||
let mut annotation_flag_count: HashMap<char, u32> = all_keys
|
||||
.iter()
|
||||
.filter_map(|key| key.chars().next()) // Get first char of each key
|
||||
.map(|c| (c, 0))
|
||||
.collect();
|
||||
|
||||
// let mut duplicate_flag_total = 0;
|
||||
let mut duplicate_flags = std::collections::HashMap::new();
|
||||
let mut unknown_flags = std::collections::HashMap::new();
|
||||
let mut unused_flag_total = 0;
|
||||
|
||||
let dict_path = dir.join("dictionary.dict");
|
||||
let dict_content = fs::read_to_string(&dict_path)
|
||||
.map_err(|e| anyhow!("Failed to read dictionary: {e}"))?;
|
||||
|
||||
for (line_num, line) in dict_content.lines().enumerate() {
|
||||
if line.is_empty()
|
||||
|| line.starts_with('#')
|
||||
|| line.chars().all(|c| c.is_ascii_digit())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let (entry_part, _comment_part) =
|
||||
line.split_once('#').map_or((line, ""), |(e, c)| (e, c));
|
||||
|
||||
if let Some((lexeme, rest)) = entry_part.split_once('/') {
|
||||
let (annotation, _whitespace) = match rest.split_once([' ', '\t']) {
|
||||
Some((a, _)) => (a, &rest[a.len()..]),
|
||||
None => (rest, ""),
|
||||
};
|
||||
|
||||
let mut seen_flags = hashbrown::HashSet::new();
|
||||
|
||||
for flag in annotation.chars() {
|
||||
if !seen_flags.insert(flag) {
|
||||
eprintln!(
|
||||
"Warning: Line {}: Duplicate annotation flag '{}' in entry: {}/{}",
|
||||
line_num + 1,
|
||||
flag,
|
||||
lexeme,
|
||||
annotation
|
||||
);
|
||||
// duplicate_flag_total += 1;
|
||||
*duplicate_flags.entry(flag).or_insert(0) += 1;
|
||||
}
|
||||
if !annotation_flag_count.contains_key(&flag) {
|
||||
eprintln!(
|
||||
"Warning: Line {}: Unknown annotation flag '{}' in entry: {}/{}",
|
||||
line_num + 1,
|
||||
flag,
|
||||
lexeme,
|
||||
annotation
|
||||
);
|
||||
*unknown_flags.entry(flag).or_insert(0) += 1;
|
||||
} else {
|
||||
*annotation_flag_count.get_mut(&flag).unwrap() += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (flag, count) in annotation_flag_count {
|
||||
if count == 0 {
|
||||
eprintln!("Warning: Unused annotation flag '{}'", flag);
|
||||
unused_flag_total += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let duplicate_flag_total = duplicate_flags.values().sum::<usize>();
|
||||
let unknown_flag_total = unknown_flags.values().sum::<usize>();
|
||||
|
||||
if duplicate_flag_total > 0 || unknown_flag_total > 0 || unused_flag_total > 0 {
|
||||
eprintln!("\nAudit found issues:");
|
||||
if duplicate_flag_total > 0 {
|
||||
eprintln!(
|
||||
" - {} duplicate flags found in {} entries",
|
||||
duplicate_flags.len(),
|
||||
duplicate_flag_total
|
||||
);
|
||||
}
|
||||
if !unknown_flags.is_empty() {
|
||||
let total_unknown = unknown_flags.values().sum::<usize>();
|
||||
eprintln!(
|
||||
" - {} unknown flags found in {} entries",
|
||||
unknown_flags.len(),
|
||||
total_unknown
|
||||
);
|
||||
}
|
||||
if unused_flag_total > 0 {
|
||||
eprintln!(" - {} unused flags found", unused_flag_total);
|
||||
}
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Args::Compounds => {
|
||||
let mut compound_map: HashMap<String, Vec<String>> = HashMap::new();
|
||||
|
||||
// First pass: process open and hyphenated compounds
|
||||
for word in dictionary.words_iter() {
|
||||
for word in curated_dictionary.words_iter() {
|
||||
if !word.contains(&' ') && !word.contains(&'-') {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -691,7 +806,7 @@ fn main() -> anyhow::Result<()> {
|
|||
}
|
||||
|
||||
// Second pass: process closed compounds
|
||||
for word in dictionary.words_iter() {
|
||||
for word in curated_dictionary.words_iter() {
|
||||
if word.contains(&' ') || word.contains(&'-') {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -728,8 +843,8 @@ fn main() -> anyhow::Result<()> {
|
|||
| OrthFlags::UPPER_CAMEL;
|
||||
let mut processed_words = HashMap::new();
|
||||
let mut longest_word = 0;
|
||||
for word in dictionary.words_iter() {
|
||||
if let Some(metadata) = dictionary.get_word_metadata(word) {
|
||||
for word in curated_dictionary.words_iter() {
|
||||
if let Some(metadata) = curated_dictionary.get_word_metadata(word) {
|
||||
let orth = metadata.orth_info;
|
||||
let bits = orth.bits() & case_bitmask.bits();
|
||||
|
||||
|
|
@ -755,7 +870,8 @@ fn main() -> anyhow::Result<()> {
|
|||
// Get input from either file or direct text
|
||||
let input = match input {
|
||||
Some(Input::File(path)) => std::fs::read_to_string(path)?,
|
||||
Some(Input::Text(text)) => text,
|
||||
Some(Input::Dir(_)) => anyhow::bail!("Directory input is not supported"),
|
||||
Some(Input::Text(text)) | Some(Input::Stdin(text)) => text,
|
||||
None => std::io::read_to_string(std::io::stdin())?,
|
||||
};
|
||||
|
||||
|
|
@ -810,9 +926,11 @@ fn main() -> anyhow::Result<()> {
|
|||
|
||||
fn load_file(
|
||||
file: &Path,
|
||||
input_identifier: Option<&str>,
|
||||
batch_mode: bool,
|
||||
markdown_options: MarkdownOptions,
|
||||
dictionary: &impl Dictionary,
|
||||
) -> anyhow::Result<(Document, String)> {
|
||||
) -> anyhow::Result<(Option<Document>, String)> {
|
||||
let source = std::fs::read_to_string(file)?;
|
||||
|
||||
let parser: Box<dyn harper_core::parsers::Parser> = match file
|
||||
|
|
@ -828,19 +946,32 @@ fn load_file(
|
|||
Some("org") => Box::new(OrgMode),
|
||||
Some("typ") => Box::new(harper_typst::Typst),
|
||||
Some("py") | Some("pyi") => Box::new(PythonParser::default()),
|
||||
Some("txt") => Box::new(PlainEnglish),
|
||||
_ => {
|
||||
if let Some(comment_parser) = CommentParser::new_from_filename(file, markdown_options) {
|
||||
Box::new(comment_parser)
|
||||
} else {
|
||||
println!(
|
||||
"Warning: could not detect language ID; falling back to PlainEnglish parser."
|
||||
eprintln!(
|
||||
"{}Warning: Could not detect language ID; {}",
|
||||
input_identifier
|
||||
.map(|id| format!("{}: ", id))
|
||||
.unwrap_or_default(),
|
||||
if batch_mode {
|
||||
"skipping file."
|
||||
} else {
|
||||
"falling back to PlainEnglish parser."
|
||||
}
|
||||
);
|
||||
Box::new(PlainEnglish)
|
||||
if batch_mode {
|
||||
return Ok((None, source));
|
||||
} else {
|
||||
Box::new(PlainEnglish)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok((Document::new(&source, &parser, dictionary), source))
|
||||
Ok((Some(Document::new(&source, &parser, dictionary)), source))
|
||||
}
|
||||
|
||||
/// Split a dictionary line into its word and annotation segments
|
||||
|
|
@ -868,30 +999,3 @@ fn print_word_derivations(word: &str, annot: &str, dictionary: &impl Dictionary)
|
|||
println!(" - {child_str}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Sync version of harper-ls/src/dictionary_io@load_dict
|
||||
fn load_dict(path: &Path) -> anyhow::Result<MutableDictionary> {
|
||||
let str = fs::read_to_string(path)?;
|
||||
|
||||
let mut dict = MutableDictionary::new();
|
||||
dict.extend_words(
|
||||
str.lines()
|
||||
.map(|l| (l.chars().collect::<Vec<_>>(), DictWordMetadata::default())),
|
||||
);
|
||||
|
||||
Ok(dict)
|
||||
}
|
||||
|
||||
/// Path version of harper-ls/src/dictionary_io@file_dict_name
|
||||
fn file_dict_name(path: &Path) -> PathBuf {
|
||||
let mut rewritten = String::new();
|
||||
|
||||
for seg in path.components() {
|
||||
if !matches!(seg, Component::RootDir) {
|
||||
rewritten.push_str(&seg.as_os_str().to_string_lossy());
|
||||
rewritten.push('%');
|
||||
}
|
||||
}
|
||||
|
||||
rewritten.into()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "harper-comments"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
|
|
@ -8,12 +8,12 @@ readme = "README.md"
|
|||
repository = "https://github.com/automattic/harper"
|
||||
|
||||
[dependencies]
|
||||
harper-core = { path = "../harper-core", version = "0.71.0" }
|
||||
harper-html = { path = "../harper-html", version = "0.71.0" }
|
||||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.71.0" }
|
||||
harper-core = { path = "../harper-core", version = "1.0.0" }
|
||||
harper-html = { path = "../harper-html", version = "1.0.0" }
|
||||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "1.0.0" }
|
||||
itertools = "0.14.0"
|
||||
tree-sitter = "0.25.10"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
tree-sitter-bash = "0.25.1"
|
||||
tree-sitter-c = "0.24.1"
|
||||
tree-sitter-cmake = "0.7.1"
|
||||
tree-sitter-cpp = "0.23.4"
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ impl CommentParser {
|
|||
"dart" => harper_tree_sitter_dart::LANGUAGE,
|
||||
"go" => tree_sitter_go::LANGUAGE,
|
||||
"haskell" => tree_sitter_haskell::LANGUAGE,
|
||||
"daml" => tree_sitter_haskell::LANGUAGE,
|
||||
"java" => tree_sitter_java::LANGUAGE,
|
||||
"javascript" => tree_sitter_javascript::LANGUAGE,
|
||||
"javascriptreact" => tree_sitter_typescript::LANGUAGE_TSX,
|
||||
|
|
@ -89,6 +90,7 @@ impl CommentParser {
|
|||
"dart" => "dart",
|
||||
"go" => "go",
|
||||
"hs" => "haskell",
|
||||
"daml" => "daml",
|
||||
"java" => "java",
|
||||
"js" => "javascript",
|
||||
"jsx" => "javascriptreact",
|
||||
|
|
@ -119,3 +121,28 @@ impl Parser for CommentParser {
|
|||
self.inner.parse(source)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::CommentParser;
|
||||
use harper_core::parsers::{MarkdownOptions, StrParser};
|
||||
|
||||
#[test]
|
||||
fn hang() {
|
||||
use std::sync::mpsc::channel;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
let (tx, rx) = channel::<()>();
|
||||
|
||||
let handle = thread::spawn(move || {
|
||||
let opts = MarkdownOptions::default();
|
||||
let parser = CommentParser::new_from_language_id("java", opts).unwrap();
|
||||
let _res = parser.parse_str("//{@j");
|
||||
tx.send(()).expect("send failed");
|
||||
});
|
||||
|
||||
rx.recv_timeout(Duration::from_secs(10)).expect("timed out");
|
||||
handle.join().expect("failed to join");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,15 +145,21 @@ fn parse_inline_tag(tokens: &[Token]) -> Option<usize> {
|
|||
return None;
|
||||
}
|
||||
|
||||
if tokens.len() <= 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut cursor = 3;
|
||||
|
||||
while !matches!(
|
||||
tokens.get(cursor),
|
||||
Some(Token {
|
||||
kind: TokenKind::Punctuation(Punctuation::CloseCurly),
|
||||
..
|
||||
})
|
||||
) {
|
||||
while cursor < tokens.len()
|
||||
&& !matches!(
|
||||
tokens.get(cursor),
|
||||
Some(Token {
|
||||
kind: TokenKind::Punctuation(Punctuation::CloseCurly),
|
||||
..
|
||||
})
|
||||
)
|
||||
{
|
||||
cursor += 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ create_test!(ignore_shebang_3.sh, 0);
|
|||
create_test!(ignore_shebang_4.sh, 1);
|
||||
create_test!(common.mill, 1);
|
||||
create_test!(basic_kotlin.kt, 0);
|
||||
create_test!(issue_1097.lua, 0);
|
||||
create_test!(basic.clj, 12);
|
||||
|
||||
// Checks that some comments are masked out
|
||||
|
|
|
|||
|
|
@ -1,6 +1,4 @@
|
|||
// *************************************************************************************************
|
||||
// File: RogueScheduler.kt
|
||||
//
|
||||
// A diminutive but fully-formed demonstration of idiomatic Kotlin.
|
||||
//
|
||||
// 1. Defines a sealed algebraic hierarchy to represent the discrete states of a task
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ object hello extends ScalaModule {
|
|||
ivy"com.lihaoyi::mainargs:0.6.2" // for CLI argument parsing
|
||||
)
|
||||
|
||||
// Define an test sub-module using a test framework.
|
||||
// Define an test submodule using a test framework.
|
||||
object test extends ScalaTests {
|
||||
def testFramework = "utest.runner.Framework"
|
||||
def ivyDeps = Agg(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
---Starting with something capitalized, but without dot at the end
|
||||
---@type table<string, string>
|
||||
local f = {} -- ending with a dot.
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "harper-core"
|
||||
version = "0.71.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
|
|
@ -10,7 +10,7 @@ repository = "https://github.com/automattic/harper"
|
|||
[dependencies]
|
||||
blanket = "0.4.0"
|
||||
fst = "0.4.7"
|
||||
hashbrown = { version = "0.16.0", features = ["serde"] }
|
||||
hashbrown = { version = "0.16.1", features = ["serde"] }
|
||||
is-macro = "0.3.6"
|
||||
itertools = "0.14.0"
|
||||
lazy_static = "1.5.0"
|
||||
|
|
@ -22,7 +22,7 @@ serde_json = "1.0.145"
|
|||
smallvec = { version = "1.15.1", features = ["serde"] }
|
||||
thiserror = "2.0.17"
|
||||
unicode-blocks = "0.1.9"
|
||||
unicode-script = "0.5.7"
|
||||
unicode-script = "0.5.8"
|
||||
unicode-width = "0.2.2"
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
cached = "0.56.0"
|
||||
|
|
@ -31,12 +31,12 @@ foldhash = "0.2.0"
|
|||
strum_macros = "0.27.2"
|
||||
strum = "0.27.2"
|
||||
ammonia = "4.1.2"
|
||||
harper-brill = { path = "../harper-brill", version = "0.71.0" }
|
||||
harper-brill = { path = "../harper-brill", version = "1.0.0" }
|
||||
bitflags = { version = "2.10.0", features = ["serde"] }
|
||||
trie-rs = "0.4.2"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.7.0", default-features = false }
|
||||
criterion = { version = "0.8.1", default-features = false }
|
||||
rand = "0.8.5"
|
||||
quickcheck = "1.0.3"
|
||||
quickcheck_macros = "1.1.0"
|
||||
|
|
|
|||
|
|
@ -6,6 +6,31 @@
|
|||
Feel free to use `harper-core` in your projects.
|
||||
If you run into issues, create a pull request.
|
||||
|
||||
## Example
|
||||
|
||||
Here's what a full end-to-end linting pipeline could look like using `harper-core`.
|
||||
|
||||
```rust
|
||||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::parsers::PlainEnglish;
|
||||
use harper_core::spell::FstDictionary;
|
||||
use harper_core::{Dialect, Document};
|
||||
|
||||
let text = "This is an test.";
|
||||
let parser = PlainEnglish;
|
||||
|
||||
let document = Document::new_curated(text, &parser);
|
||||
|
||||
let dict = FstDictionary::curated();
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
for lint in lints {
|
||||
println!("{:?}", lint);
|
||||
}
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
`concurrent`: Whether to use thread-safe primitives (`Arc` vs `Rc`). Disabled by default.
|
||||
|
|
|
|||
|
|
@ -59,7 +59,9 @@
|
|||
"target": [
|
||||
{
|
||||
"metadata": {
|
||||
"adverb": {}
|
||||
"adverb": {
|
||||
"is_manner": true
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
|
|
@ -646,6 +648,14 @@
|
|||
"verb": {}
|
||||
}
|
||||
},
|
||||
"j": {
|
||||
"#": "verb past property",
|
||||
"metadata": {
|
||||
"verb": {
|
||||
"verb_form": "PAST"
|
||||
}
|
||||
}
|
||||
},
|
||||
"J": {
|
||||
"#": "adjective property",
|
||||
"metadata": {
|
||||
|
|
@ -942,6 +952,59 @@
|
|||
"verb_form": "THIRD_PERSON_SINGULAR"
|
||||
}
|
||||
}
|
||||
},
|
||||
"y": {
|
||||
"#": "adverb of manner property",
|
||||
"//": "mnemonic: 'y' looks like 'ly'",
|
||||
"metadata": {
|
||||
"adverb": {
|
||||
"is_manner": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"#": "adverb of frequency property",
|
||||
"//": "mnemonic: '8' looks like '♾️'",
|
||||
"metadata": {
|
||||
"adverb": {
|
||||
"is_frequency": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"%": {
|
||||
"#": "adverb of degree property",
|
||||
"//": "mnemonic: '%' reminds of '°'",
|
||||
"metadata": {
|
||||
"adverb": {
|
||||
"is_degree": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"♂": {
|
||||
"#": "masculine property",
|
||||
"metadata": {
|
||||
"//": "not yet implemented"
|
||||
}
|
||||
},
|
||||
"♀": {
|
||||
"#": "feminine property",
|
||||
"metadata": {
|
||||
"//": "not yet implemented"
|
||||
}
|
||||
},
|
||||
"ª": {
|
||||
"#": "animate property",
|
||||
"metadata": {
|
||||
"//": "not yet implemented"
|
||||
}
|
||||
},
|
||||
"(": {
|
||||
"#": "prefix property",
|
||||
"metadata": {
|
||||
"affix": {
|
||||
"is_prefix": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
162
harper-core/irregular_nouns.json
Normal file
162
harper-core/irregular_nouns.json
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
[
|
||||
"// comments can appear in the line before an entry",
|
||||
"// or in place of an entry",
|
||||
["child", "children"],
|
||||
["foot", "feet"],
|
||||
["goose", "geese"],
|
||||
["man", "men"],
|
||||
["mouse", "mice"],
|
||||
["ox", "oxen"],
|
||||
["person", "people"],
|
||||
["seraph", "seraphim"],
|
||||
["woman", "women"],
|
||||
["addendum", "addenda"],
|
||||
["aircraft", "aircraft"],
|
||||
["aircraftman", "aircraftmen"],
|
||||
["aircraftwoman", "aircraftwomen"],
|
||||
["airman", "airmen"],
|
||||
["alderman", "aldermen"],
|
||||
["alga", "algae"],
|
||||
["alveolus", "alveoli"],
|
||||
["anchorman", "anchormen"],
|
||||
["anchorwoman", "anchorwomen"],
|
||||
["atrium", "atria"],
|
||||
["axis", "axes"],
|
||||
["bacillus", "bacilli"],
|
||||
["bacterium", "bacteria"],
|
||||
["bandsman", "bandsmen"],
|
||||
["bargeman", "bargemen"],
|
||||
["bellman", "bellmen"],
|
||||
["biceps", "biceps"],
|
||||
["boatman", "boatmen"],
|
||||
["bronchus", "bronchi"],
|
||||
["businesswoman", "businesswomen"],
|
||||
["cactus", "cacti"],
|
||||
["cameraperson", "camerapeople"],
|
||||
["candelabrum", "candelabra"],
|
||||
["catharsis", "catharses"],
|
||||
["chairman", "chairmen"],
|
||||
["chairwoman", "chairwomen"],
|
||||
["churchwoman", "churchwomen"],
|
||||
["clansman", "clansmen"],
|
||||
["clanswoman", "clanswomen"],
|
||||
["committeeman", "committeemen"],
|
||||
["committeewoman", "committeewomen"],
|
||||
["continuum", "continua"],
|
||||
["corpus", "corpora"],
|
||||
["craftsman", "craftsmen"],
|
||||
["craftswoman", "craftswomen"],
|
||||
["crisis", "crises"],
|
||||
["cyclops", "cyclopes"],
|
||||
["datum", "data"],
|
||||
["diaeresis", "diaereses"],
|
||||
["diagnosis", "diagnoses"],
|
||||
["dominatrix", "dominatrices"],
|
||||
["draughtsman", "draughtsmen"],
|
||||
["draughtswoman", "draughtswomen"],
|
||||
["effluvium", "effluvia"],
|
||||
["emphasis", "emphases"],
|
||||
["esophagus", "esophagi"],
|
||||
["extremum", "extrema"],
|
||||
["fish", "fish"],
|
||||
["footman", "footmen"],
|
||||
["formula", "formulae"],
|
||||
["forum", "fora"],
|
||||
["freeman", "freemen"],
|
||||
["frontiersman", "frontiersmen"],
|
||||
["frontierswoman", "frontierswomen"],
|
||||
["garbageman", "garbagemen"],
|
||||
["genesis", "geneses"],
|
||||
["genie", "genii"],
|
||||
["genius", "genii"],
|
||||
["genus", "genera"],
|
||||
["glissando", "glissandi"],
|
||||
["graffito", "graffiti"],
|
||||
["grandchild", "grandchildren"],
|
||||
["handyman", "handymen"],
|
||||
["hitman", "hitmen"],
|
||||
["houseman", "housemen"],
|
||||
["iceman", "icemen"],
|
||||
["ilium", "ilia"],
|
||||
["index", "indices"],
|
||||
["intermezzo", "intermezzi"],
|
||||
["journeyman", "journeymen"],
|
||||
["labium", "labia"],
|
||||
["lamina", "laminae"],
|
||||
["laundrywoman", "laundrywomen"],
|
||||
["laywoman", "laywomen"],
|
||||
["linesman", "linesmen"],
|
||||
["lira", " lire"],
|
||||
["longshoreman", "longshoremen"],
|
||||
["louse", "lice"],
|
||||
["madman", "madmen"],
|
||||
["mailman", "mailmen"],
|
||||
["memorandum", "memoranda"],
|
||||
["metathesis", "metatheses"],
|
||||
["minimum", "minima"],
|
||||
["mitosis", "mitoses"],
|
||||
["motorman", "motormen"],
|
||||
["muscleman", "musclemen"],
|
||||
["nemesis", "nemeses"],
|
||||
["nightwatchman", "nightwatchmen"],
|
||||
["oarsman", "oarsmen"],
|
||||
["oarswoman", "oarswomen"],
|
||||
["oasis", "oases"],
|
||||
["ombudsman", "ombudsmen"],
|
||||
["optimum", "optima"],
|
||||
["palazzo", "palazzi"],
|
||||
["papyrus", "papyri"],
|
||||
["parenthesis", "parentheses"],
|
||||
["patina", "patinae"],
|
||||
["patrolman", "patrolmen"],
|
||||
["pericardium", "pericardia"],
|
||||
["periphrasis", "periphrases"],
|
||||
["pharynx", "pharynges"],
|
||||
["phenomenon", "phenomena"],
|
||||
["plainclothesman", "plainclothesmen"],
|
||||
["pneumococcus", "pneumococci"],
|
||||
["pressman", "pressmen"],
|
||||
["prosthesis", "protheses"],
|
||||
["quantum", "quanta"],
|
||||
["radius", "radii"],
|
||||
["radix", "radices"],
|
||||
["repairman", "repairmen"],
|
||||
["salesman", "salesmen"],
|
||||
["saleswoman", "saleswomen"],
|
||||
["sandman", "sandmen"],
|
||||
["schema", "schemata"],
|
||||
["sheep", "sheep"],
|
||||
["shoreman", "shoremen"],
|
||||
["signore", "signori"],
|
||||
["simulacrum", "simulacra"],
|
||||
["solarium", "solaria"],
|
||||
["spokesman", "spokesmen"],
|
||||
["spokesperson", "spokespeople"],
|
||||
["spokeswoman", "spokeswomen"],
|
||||
["statesman", "statesmen"],
|
||||
["stateswoman", "stateswomen"],
|
||||
["steersman", "steersmen"],
|
||||
["stratum", "strata"],
|
||||
["streptococcus", "streptococci"],
|
||||
["succubus", "succubi"],
|
||||
["symbiosis", "symbioses"],
|
||||
["tarsus", "tarsi"],
|
||||
["taxon", "taxa"],
|
||||
["testatrix", "testatrices"],
|
||||
["testis", "testes"],
|
||||
["thesis", "theses"],
|
||||
["thrombosis", "thromboses"],
|
||||
["tooth", "teeth"],
|
||||
["townsman", "townsmen"],
|
||||
["townswoman", "townswomen"],
|
||||
["tradesman", "tradesmen"],
|
||||
["tradeswoman", "tradeswomen"],
|
||||
["uterus", "uteri"],
|
||||
["vertebra", "vertebrae"],
|
||||
["vertex", "vertices"],
|
||||
["vivarium", "vivaria"],
|
||||
["washerwoman", "washerwomen"],
|
||||
["woodlouse", "woodlice"],
|
||||
["workingwoman", "workingwomen"],
|
||||
["workman", "workmen"]
|
||||
]
|
||||
127
harper-core/irregular_verbs.json
Normal file
127
harper-core/irregular_verbs.json
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
[
|
||||
"// comments can appear in the line before an entry",
|
||||
"// or in place of an entry",
|
||||
["arise", "arose", "arisen"],
|
||||
["awake", "awoke", "awoken"],
|
||||
"// be/am/are/is -- was/were -- been",
|
||||
["become", "became", "become"],
|
||||
["begin", "began", "begun"],
|
||||
["bend", "bent", "bent"],
|
||||
["bet", "bet", "bet"],
|
||||
["bid", "bade", "bidden"],
|
||||
["bind", "bound", "bound"],
|
||||
["bite", "bit", "bitten"],
|
||||
["bleed", "bled", "bled"],
|
||||
["blow", "blew", "blown"],
|
||||
["break", "broke", "broken"],
|
||||
["breed", "bred", "bred"],
|
||||
["bring", "brought", "brought"],
|
||||
["build", "built", "built"],
|
||||
["burst", "burst", "burst"],
|
||||
["buy", "bought", "bought"],
|
||||
["catch", "caught", "caught"],
|
||||
["choose", "chose", "chosen"],
|
||||
["come", "came", "come"],
|
||||
["cost", "cost", "cost"],
|
||||
["cut", "cut", "cut"],
|
||||
["dive", "dove", "dove"],
|
||||
["do", "did", "done"],
|
||||
["drink", "drank", "drunk"],
|
||||
["drive", "drove", "driven"],
|
||||
["eat", "ate", "eaten"],
|
||||
["fall", "fell", "fallen"],
|
||||
["feed", "fed", "fed"],
|
||||
["feel", "felt", "felt"],
|
||||
["fight", "fought", "fought"],
|
||||
["find", "found", "found"],
|
||||
["fly", "flew", "flown"],
|
||||
["forget", "forgot", "forgotten"],
|
||||
["forgo", "forwent", "forgone"],
|
||||
["freeze", "froze", "frozen"],
|
||||
"// get -- got -- gotten",
|
||||
["get", "got", "got"],
|
||||
["give", "gave", "given"],
|
||||
["go", "went", "gone"],
|
||||
["grow", "grew", "grown"],
|
||||
["have", "had", "had"],
|
||||
["hear", "heard", "heard"],
|
||||
["hit", "hit", "hit"],
|
||||
["hold", "held", "held"],
|
||||
["hurt", "hurt", "hurt"],
|
||||
["input", "input", "input"],
|
||||
["keep", "kept", "kept"],
|
||||
["know", "knew", "known"],
|
||||
["lay", "laid", "lain"],
|
||||
["lead", "led", "led"],
|
||||
["light", "lit", "lit"],
|
||||
["lose", "lost", "lost"],
|
||||
["make", "made", "made"],
|
||||
["mistake", "mistook", "mistaken"],
|
||||
["output", "output", "output"],
|
||||
["overtake", "overtook", "overtaken"],
|
||||
["overthrow", "overthrew", "overthrown"],
|
||||
["overwrite", "overwrote", "overwritten"],
|
||||
["partake", "partook", "partaken"],
|
||||
["pay", "paid", "paid"],
|
||||
["put", "put", "put"],
|
||||
["read", "read", "read"],
|
||||
["redo", "redid", "redone"],
|
||||
["remake", "remade", "remade"],
|
||||
["reread", "reread", "reread"],
|
||||
["reset", "reset", "reset"],
|
||||
["ride", "rode", "ridden"],
|
||||
["ring", "rang", "rung"],
|
||||
["rise", "rose", "risen"],
|
||||
["run", "ran", "run"],
|
||||
["see", "saw", "seen"],
|
||||
["sell", "sold", "sold"],
|
||||
["send", "sent", "sent"],
|
||||
["set", "set", "set"],
|
||||
["shake", "shook", "shaken"],
|
||||
["shed", "shed", "shed"],
|
||||
["shine", "shone", "shone"],
|
||||
["shoe", "shod", "shod"],
|
||||
["shoot", "shot", "shot"],
|
||||
["show", "showed", "shown"],
|
||||
["shrink", "shrank", "shrunk"],
|
||||
["shut", "shut", "shut"],
|
||||
["sing", "sang", "sung"],
|
||||
"// sink -- sank -- sunken??",
|
||||
["sink", "sank", "sunk"],
|
||||
["sit", "sat", "sat"],
|
||||
["slay", "slew", "slain"],
|
||||
["sleep", "slept", "slept"],
|
||||
["slide", "slid", "slid"],
|
||||
["slit", "slit", "slit"],
|
||||
"// sneak -- sneaked/snuck -- sneaked/snuck",
|
||||
["speak", "spoke", "spoken"],
|
||||
["spin", "spun", "spun"],
|
||||
["spit", "spat", "spat"],
|
||||
["split", "split", "split"],
|
||||
["spread", "spread", "spread"],
|
||||
["spring", "sprang", "sprung"],
|
||||
["stand", "stood", "stood"],
|
||||
["steal", "stole", "stolen"],
|
||||
["stick", "stuck", "stuck"],
|
||||
["sting", "stung", "stung"],
|
||||
["stink", "stank", "stunk"],
|
||||
["stride", "strode", "stridden"],
|
||||
["strike", "struck", "stricken"],
|
||||
["string", "strung", "strung"],
|
||||
["sew", "sewed", "sewn"],
|
||||
["swear", "swore", "sworn"],
|
||||
["swim", "swam", "swum"],
|
||||
["swing", "swung", "swung"],
|
||||
["take", "took", "taken"],
|
||||
["teach", "taught", "taught"],
|
||||
["tear", "tore", "torn"],
|
||||
["think", "thought", "thought"],
|
||||
["throw", "threw", "thrown"],
|
||||
["tread", "trod", "trodden"],
|
||||
["undo", "undid", "undone"],
|
||||
["wake", "woke", "woken"],
|
||||
["wear", "wore", "worn"],
|
||||
["weave", "wove", "woven"],
|
||||
["wind", "wound", "wound"],
|
||||
["write", "wrote", "written"]
|
||||
]
|
||||
|
|
@ -3,7 +3,13 @@ use unicode_width::UnicodeWidthChar;
|
|||
|
||||
use crate::Punctuation;
|
||||
|
||||
pub trait CharExt {
|
||||
mod private {
|
||||
pub trait Sealed {}
|
||||
|
||||
impl Sealed for char {}
|
||||
}
|
||||
|
||||
pub trait CharExt: private::Sealed {
|
||||
fn is_cjk(&self) -> bool;
|
||||
/// Whether a character can be a component of an English word.
|
||||
fn is_english_lingual(&self) -> bool;
|
||||
|
|
|
|||
|
|
@ -7,8 +7,14 @@ use smallvec::SmallVec;
|
|||
/// Most English words are fewer than 12 characters.
|
||||
pub type CharString = SmallVec<[char; 16]>;
|
||||
|
||||
mod private {
|
||||
pub trait Sealed {}
|
||||
|
||||
impl Sealed for [char] {}
|
||||
}
|
||||
|
||||
/// Extensions to character sequences that make them easier to wrangle.
|
||||
pub trait CharStringExt {
|
||||
pub trait CharStringExt: private::Sealed {
|
||||
/// Convert all characters to lowercase, returning a new owned vector if any changes were made.
|
||||
fn to_lower(&'_ self) -> Cow<'_, [char]>;
|
||||
|
||||
|
|
@ -26,10 +32,22 @@ pub trait CharStringExt {
|
|||
/// Only normalizes the left side to lowercase and avoids allocations.
|
||||
fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
|
||||
|
||||
/// Case-insensitive comparison with any of a list of string slices, assuming the right-hand side is lowercase ASCII.
|
||||
/// Only normalizes the left side to lowercase and avoids allocations.
|
||||
fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool;
|
||||
|
||||
/// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
|
||||
/// Only normalizes the left side to lowercase and avoids allocations.
|
||||
fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
|
||||
|
||||
/// Case-insensitive check if the string starts with the given ASCII prefix.
|
||||
/// The prefix is assumed to be lowercase.
|
||||
fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool;
|
||||
|
||||
/// Case-insensitive check if the string starts with any of the given ASCII prefixes.
|
||||
/// The prefixes are assumed to be lowercase.
|
||||
fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool;
|
||||
|
||||
/// Case-insensitive check if the string ends with the given ASCII suffix.
|
||||
/// The suffix is assumed to be lowercase.
|
||||
fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
|
||||
|
|
@ -37,6 +55,13 @@ pub trait CharStringExt {
|
|||
/// Case-insensitive check if the string ends with the given ASCII suffix.
|
||||
/// The suffix is assumed to be lowercase.
|
||||
fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
|
||||
|
||||
/// Case-insensitive check if the string ends with any of the given ASCII suffixes.
|
||||
/// The suffixes are assumed to be lowercase.
|
||||
fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool;
|
||||
|
||||
/// Check if the string contains any vowels
|
||||
fn contains_vowel(&self) -> bool;
|
||||
}
|
||||
|
||||
impl CharStringExt for [char] {
|
||||
|
|
@ -88,12 +113,33 @@ impl CharStringExt for [char] {
|
|||
.all(|(a, b)| a.to_ascii_lowercase() == *b)
|
||||
}
|
||||
|
||||
fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool {
|
||||
others.iter().any(|str| self.eq_ignore_ascii_case_str(str))
|
||||
}
|
||||
|
||||
fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
|
||||
others
|
||||
.iter()
|
||||
.any(|chars| self.eq_ignore_ascii_case_chars(chars))
|
||||
}
|
||||
|
||||
fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool {
|
||||
let prefix_len = prefix.len();
|
||||
if self.len() < prefix_len {
|
||||
return false;
|
||||
}
|
||||
self.iter()
|
||||
.take(prefix_len)
|
||||
.zip(prefix.chars())
|
||||
.all(|(a, b)| a.to_ascii_lowercase() == b)
|
||||
}
|
||||
|
||||
fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool {
|
||||
prefixes
|
||||
.iter()
|
||||
.any(|prefix| self.starts_with_ignore_ascii_case_str(prefix))
|
||||
}
|
||||
|
||||
fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
|
||||
let suffix_len = suffix.len();
|
||||
if self.len() < suffix_len {
|
||||
|
|
@ -119,6 +165,16 @@ impl CharStringExt for [char] {
|
|||
.zip(suffix.iter())
|
||||
.all(|(a, b)| a.to_ascii_lowercase() == *b)
|
||||
}
|
||||
|
||||
fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool {
|
||||
suffixes
|
||||
.iter()
|
||||
.any(|suffix| self.ends_with_ignore_ascii_case_chars(suffix))
|
||||
}
|
||||
|
||||
fn contains_vowel(&self) -> bool {
|
||||
self.iter().any(|c| c.is_vowel())
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! char_string {
|
||||
|
|
|
|||
|
|
@ -18,12 +18,20 @@ use crate::{Document, TokenKind, TokenStringExt};
|
|||
/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
|
||||
pub struct DictWordMetadata {
|
||||
/// The main parts of speech which have extra data.
|
||||
pub noun: Option<NounData>,
|
||||
pub pronoun: Option<PronounData>,
|
||||
pub verb: Option<VerbData>,
|
||||
pub adjective: Option<AdjectiveData>,
|
||||
pub adverb: Option<AdverbData>,
|
||||
pub conjunction: Option<ConjunctionData>,
|
||||
pub determiner: Option<DeterminerData>,
|
||||
pub affix: Option<AffixData>,
|
||||
/// Parts of speech which don't have extra data.
|
||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||
#[serde(default = "default_false")]
|
||||
pub preposition: bool,
|
||||
/// Whether the word is an offensive word.
|
||||
pub swear: Option<bool>,
|
||||
/// The dialects this word belongs to.
|
||||
/// If no dialects are defined, it can be assumed that the word is
|
||||
|
|
@ -33,19 +41,17 @@ pub struct DictWordMetadata {
|
|||
/// Orthographic information: letter case, spaces, hyphens, etc.
|
||||
#[serde(default = "OrthFlags::empty")]
|
||||
pub orth_info: OrthFlags,
|
||||
/// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
|
||||
pub determiner: Option<DeterminerData>,
|
||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||
#[serde(default = "default_false")]
|
||||
pub preposition: bool,
|
||||
/// Whether the word is considered especially common.
|
||||
#[serde(default = "default_false")]
|
||||
pub common: bool,
|
||||
#[serde(default = "default_none")]
|
||||
pub derived_from: Option<WordId>,
|
||||
/// Generated by a chunker
|
||||
/// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using
|
||||
/// this should be preferred over the similarly named `Pattern`.
|
||||
///
|
||||
/// For more details, see [the announcement blog post](https://elijahpotter.dev/articles/training_a_chunker_with_burn).
|
||||
pub np_member: Option<bool>,
|
||||
/// Generated by a POS tagger
|
||||
/// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
|
||||
pub pos_tag: Option<UPOS>,
|
||||
}
|
||||
|
||||
|
|
@ -186,11 +192,12 @@ impl DictWordMetadata {
|
|||
adjective: merge!(self.adjective, other.adjective),
|
||||
adverb: merge!(self.adverb, other.adverb),
|
||||
conjunction: merge!(self.conjunction, other.conjunction),
|
||||
determiner: merge!(self.determiner, other.determiner),
|
||||
affix: merge!(self.affix, other.affix),
|
||||
preposition: self.preposition || other.preposition,
|
||||
dialects: self.dialects | other.dialects,
|
||||
orth_info: self.orth_info | other.orth_info,
|
||||
swear: self.swear.or(other.swear),
|
||||
determiner: merge!(self.determiner, other.determiner),
|
||||
preposition: self.preposition || other.preposition,
|
||||
common: self.common || other.common,
|
||||
derived_from: self.derived_from.or(other.derived_from),
|
||||
pos_tag: self.pos_tag.or(other.pos_tag),
|
||||
|
|
@ -231,6 +238,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
PROPN => {
|
||||
|
|
@ -256,6 +264,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
PRON => {
|
||||
|
|
@ -269,6 +278,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
VERB => {
|
||||
|
|
@ -290,6 +300,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
AUX => {
|
||||
|
|
@ -311,6 +322,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADJ => {
|
||||
|
|
@ -324,6 +336,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADV => {
|
||||
|
|
@ -337,6 +350,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADP => {
|
||||
|
|
@ -347,6 +361,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = true;
|
||||
}
|
||||
DET => {
|
||||
|
|
@ -356,6 +371,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
self.determiner = Some(DeterminerData::default());
|
||||
}
|
||||
|
|
@ -370,6 +386,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.adverb = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -384,7 +401,7 @@ impl DictWordMetadata {
|
|||
verb has linking, auxiliary.
|
||||
conjunction has.
|
||||
adjective has.
|
||||
adverb has
|
||||
adverb has manner, frequency, degree
|
||||
);
|
||||
|
||||
// Manual metadata queries
|
||||
|
|
@ -643,7 +660,7 @@ impl DictWordMetadata {
|
|||
|
||||
// Checks if the word is definitely a determiner and more specifically is labeled as (a) quantifier.
|
||||
pub fn is_quantifier(&self) -> bool {
|
||||
self.determiner.is_some()
|
||||
self.is_quantifier_determiner()
|
||||
}
|
||||
|
||||
// Non-POS queries
|
||||
|
|
@ -928,12 +945,20 @@ impl AdjectiveData {
|
|||
/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
|
||||
/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
|
||||
pub struct AdverbData {}
|
||||
pub struct AdverbData {
|
||||
pub is_manner: Option<bool>,
|
||||
pub is_frequency: Option<bool>,
|
||||
pub is_degree: Option<bool>,
|
||||
}
|
||||
|
||||
impl AdverbData {
|
||||
/// Produce a copy of `self` with the known properties of `other` set.
|
||||
pub fn or(&self, _other: &Self) -> Self {
|
||||
Self {}
|
||||
Self {
|
||||
is_manner: self.is_manner.or(_other.is_manner),
|
||||
is_frequency: self.is_frequency.or(_other.is_frequency),
|
||||
is_degree: self.is_degree.or(_other.is_degree),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -947,6 +972,22 @@ impl ConjunctionData {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
|
||||
pub struct AffixData {
|
||||
pub is_prefix: Option<bool>,
|
||||
pub is_suffix: Option<bool>,
|
||||
}
|
||||
|
||||
impl AffixData {
|
||||
/// Produce a copy of `self` with the known properties of `other` set.
|
||||
pub fn or(&self, _other: &Self) -> Self {
|
||||
Self {
|
||||
is_prefix: self.is_prefix.or(_other.is_prefix),
|
||||
is_suffix: self.is_suffix.or(_other.is_suffix),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A regional dialect.
|
||||
///
|
||||
/// Note: these have bit-shifted values so that they can ergonomically integrate with
|
||||
|
|
@ -1821,6 +1862,16 @@ pub mod tests {
|
|||
assert!(md("your").is_possessive_determiner());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn every_is_quantifier() {
|
||||
assert!(md("every").is_quantifier());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn the_isnt_quantifier() {
|
||||
assert!(!md("the").is_quantifier());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn equipment_is_mass_noun() {
|
||||
assert!(md("equipment").is_mass_noun());
|
||||
|
|
|
|||
|
|
@ -918,6 +918,7 @@ impl TokenStringExt for Document {
|
|||
create_fns_on_doc!(verb);
|
||||
create_fns_on_doc!(word);
|
||||
create_fns_on_doc!(word_like);
|
||||
create_fns_on_doc!(heading_start);
|
||||
|
||||
fn first_sentence_word(&self) -> Option<&Token> {
|
||||
self.tokens.first_sentence_word()
|
||||
|
|
@ -947,6 +948,10 @@ impl TokenStringExt for Document {
|
|||
self.tokens.iter_paragraphs()
|
||||
}
|
||||
|
||||
fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
|
||||
self.tokens.iter_headings()
|
||||
}
|
||||
|
||||
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
|
||||
self.tokens.iter_sentences()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,17 +42,13 @@ impl FixedPhrase {
|
|||
phrase = phrase.then_whitespace();
|
||||
}
|
||||
TokenKind::Punctuation(p) => {
|
||||
phrase = phrase.then(move |t: &Token, _source: &[char]| {
|
||||
t.kind.as_punctuation().cloned() == Some(p)
|
||||
})
|
||||
phrase = phrase
|
||||
.then_kind_where(move |kind| kind.as_punctuation().cloned() == Some(p));
|
||||
}
|
||||
TokenKind::ParagraphBreak => {
|
||||
phrase = phrase.then_whitespace();
|
||||
}
|
||||
TokenKind::Number(n) => {
|
||||
phrase = phrase
|
||||
.then(move |tok: &Token, _source: &[char]| tok.kind == TokenKind::Number(n))
|
||||
}
|
||||
TokenKind::Number(_) => phrase = phrase.then_kind_where(|kind| kind.is_number()),
|
||||
_ => panic!("Fell out of expected document formats."),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -154,6 +154,8 @@ where
|
|||
|
||||
pub trait OwnedExprExt {
|
||||
fn or(self, other: impl Expr + 'static) -> FirstMatchOf;
|
||||
fn and(self, other: impl Expr + 'static) -> All;
|
||||
fn and_not(self, other: impl Expr + 'static) -> All;
|
||||
fn or_longest(self, other: impl Expr + 'static) -> LongestMatchOf;
|
||||
}
|
||||
|
||||
|
|
@ -166,6 +168,16 @@ where
|
|||
FirstMatchOf::new(vec![Box::new(self), Box::new(other)])
|
||||
}
|
||||
|
||||
/// Returns an expression that matches only if both the current one and the expression contained in `other` do.
|
||||
fn and(self, other: impl Expr + 'static) -> All {
|
||||
All::new(vec![Box::new(self), Box::new(other)])
|
||||
}
|
||||
|
||||
/// Returns an expression that matches only if the current one matches and the expression contained in `other` does not.
|
||||
fn and_not(self, other: impl Expr + 'static) -> All {
|
||||
self.and(UnlessStep::new(other, |_tok: &Token, _src: &[char]| true))
|
||||
}
|
||||
|
||||
/// Returns an expression that matches the longest of the current one or the expression contained in `other`.
|
||||
///
|
||||
/// If you don't need the longest match, prefer using the short-circuiting [`Self::or()`] instead.
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ macro_rules! gen_then_from_is {
|
|||
paste! {
|
||||
#[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
|
||||
pub fn [< then_$quality >] (self) -> Self{
|
||||
self.then(|tok: &Token, _source: &[char]| {
|
||||
tok.kind.[< is_$quality >]()
|
||||
self.then_kind_where(|kind| {
|
||||
kind.[< is_$quality >]()
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -40,12 +40,8 @@ macro_rules! gen_then_from_is {
|
|||
|
||||
#[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
|
||||
pub fn [< then_anything_but_$quality >] (self) -> Self{
|
||||
self.then(|tok: &Token, _source: &[char]| {
|
||||
if tok.kind.[< is_$quality >](){
|
||||
false
|
||||
}else{
|
||||
true
|
||||
}
|
||||
self.then_kind_where(|kind| {
|
||||
!kind.[< is_$quality >]()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -84,6 +80,13 @@ impl Expr for SequenceExpr {
|
|||
impl SequenceExpr {
|
||||
// Constructor methods
|
||||
|
||||
// Single token methods
|
||||
|
||||
/// Construct a new sequence with an [`AnyPattern`] at the beginning of the operation list.
|
||||
pub fn anything() -> Self {
|
||||
Self::default().then_anything()
|
||||
}
|
||||
|
||||
// Single word token methods
|
||||
|
||||
/// Construct a new sequence with a [`Word`] at the beginning of the operation list.
|
||||
|
|
@ -170,9 +173,9 @@ impl SequenceExpr {
|
|||
self.then(WordSet::new(words))
|
||||
}
|
||||
|
||||
/// Matches any token whose `Kind` exactly matches.
|
||||
pub fn then_strict(self, kind: TokenKind) -> Self {
|
||||
self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
|
||||
/// Shorthand for [`Self::then_word_set`].
|
||||
pub fn t_set(self, words: &'static [&'static str]) -> Self {
|
||||
self.then_word_set(words)
|
||||
}
|
||||
|
||||
/// Match against one or more whitespace tokens.
|
||||
|
|
@ -180,6 +183,11 @@ impl SequenceExpr {
|
|||
self.then(WhitespacePattern)
|
||||
}
|
||||
|
||||
/// Shorthand for [`Self::then_whitespace`].
|
||||
pub fn t_ws(self) -> Self {
|
||||
self.then_whitespace()
|
||||
}
|
||||
|
||||
/// Match against one or more whitespace tokens.
|
||||
pub fn then_whitespace_or_hyphen(self) -> Self {
|
||||
self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
|
||||
|
|
@ -190,11 +198,6 @@ impl SequenceExpr {
|
|||
self.then_whitespace_or_hyphen()
|
||||
}
|
||||
|
||||
/// Shorthand for [`Self::then_whitespace`].
|
||||
pub fn t_ws(self) -> Self {
|
||||
self.then_whitespace()
|
||||
}
|
||||
|
||||
pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
|
||||
self.then(Repeating::new(Box::new(expr), 1))
|
||||
}
|
||||
|
|
@ -229,7 +232,7 @@ impl SequenceExpr {
|
|||
|
||||
/// Matches any word.
|
||||
pub fn then_any_word(self) -> Self {
|
||||
self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
|
||||
self.then_kind_where(|kind| kind.is_word())
|
||||
}
|
||||
|
||||
/// Match examples of `word` that have any capitalization.
|
||||
|
|
@ -266,6 +269,23 @@ impl SequenceExpr {
|
|||
|
||||
// One kind
|
||||
|
||||
/// Matches any token whose `Kind` exactly matches.
|
||||
pub fn then_kind(self, kind: TokenKind) -> Self {
|
||||
self.then_kind_where(move |k| kind == *k)
|
||||
}
|
||||
|
||||
/// Matches a token where the provided closure returns true for the token's kind.
|
||||
pub fn then_kind_where<F>(mut self, predicate: F) -> Self
|
||||
where
|
||||
F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.exprs
|
||||
.push(Box::new(move |tok: &Token, _source: &[char]| {
|
||||
predicate(&tok.kind)
|
||||
}));
|
||||
self
|
||||
}
|
||||
|
||||
/// Match a token of a given kind which is not in the list of words.
|
||||
pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
|
||||
where
|
||||
|
|
@ -288,17 +308,27 @@ impl SequenceExpr {
|
|||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| pred_is_1(&tok.kind) && pred_is_2(&tok.kind))
|
||||
self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
|
||||
}
|
||||
|
||||
/// Match a token where either of the two token kind predicates returns true.
|
||||
/// For instance, an adjetive or an adverb.
|
||||
/// For instance, an adjective or an adverb.
|
||||
pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| pred_is_1(&tok.kind) || pred_is_2(&tok.kind))
|
||||
self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
|
||||
}
|
||||
|
||||
/// Match a token where neither of the two token kind predicates returns true.
|
||||
/// For instance, a word that can't be a verb or a noun.
|
||||
pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and the second returns false.
|
||||
|
|
@ -308,7 +338,7 @@ impl SequenceExpr {
|
|||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| pred_is(&tok.kind) && !pred_not(&tok.kind))
|
||||
self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and the second returns false,
|
||||
|
|
@ -332,6 +362,42 @@ impl SequenceExpr {
|
|||
})
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and all of the second return false.
|
||||
/// For instance, a word that can be a verb but not a noun or an adjective.
|
||||
pub fn then_kind_is_but_isnt_any_of<F1, F2>(
|
||||
self,
|
||||
pred_is: F1,
|
||||
preds_isnt: &'static [F2],
|
||||
) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
|
||||
}
|
||||
|
||||
/// Match a token where the first token kind predicate returns true and all of the second return false,
|
||||
/// and the token is not in the list of exceptions.
|
||||
/// For instance, an adjective that isn't also a verb or adverb or the word "likely".
|
||||
pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
|
||||
self,
|
||||
pred_is: F1,
|
||||
preds_isnt: &'static [F2],
|
||||
ex: &'static [&'static str],
|
||||
) -> Self
|
||||
where
|
||||
F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, src: &[char]| {
|
||||
pred_is(&tok.kind)
|
||||
&& !preds_isnt.iter().any(|pred| pred(&tok.kind))
|
||||
&& !ex
|
||||
.iter()
|
||||
.any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
|
||||
})
|
||||
}
|
||||
|
||||
gen_then_from_is!(sentence_terminator);
|
||||
// More than two kinds
|
||||
|
||||
|
|
@ -341,7 +407,16 @@ impl SequenceExpr {
|
|||
where
|
||||
F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then(move |tok: &Token, _source: &[char]| preds_is.iter().any(|pred| pred(&tok.kind)))
|
||||
self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
|
||||
}
|
||||
|
||||
/// Match a token where none of the token kind predicates returns true.
|
||||
/// Like `then_kind_neither` but for more than two predicates.
|
||||
pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
|
||||
where
|
||||
F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
|
||||
{
|
||||
self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
|
||||
}
|
||||
|
||||
/// Match a token where any of the token kind predicates returns true,
|
||||
|
|
@ -374,7 +449,6 @@ impl SequenceExpr {
|
|||
{
|
||||
self.then(move |tok: &Token, src: &[char]| {
|
||||
preds.iter().any(|pred| pred(&tok.kind))
|
||||
// && !words
|
||||
|| words
|
||||
.iter()
|
||||
.any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
|
||||
|
|
@ -421,6 +495,7 @@ impl SequenceExpr {
|
|||
|
||||
gen_then_from_is!(noun);
|
||||
gen_then_from_is!(proper_noun);
|
||||
gen_then_from_is!(plural_noun);
|
||||
gen_then_from_is!(mass_noun_only);
|
||||
|
||||
// Pronouns
|
||||
|
|
@ -444,6 +519,7 @@ impl SequenceExpr {
|
|||
gen_then_from_is!(verb_lemma);
|
||||
gen_then_from_is!(verb_simple_past_form);
|
||||
gen_then_from_is!(verb_past_participle_form);
|
||||
gen_then_from_is!(verb_progressive_form);
|
||||
|
||||
// Adjectives
|
||||
|
||||
|
|
@ -455,6 +531,7 @@ impl SequenceExpr {
|
|||
// Adverbs
|
||||
|
||||
gen_then_from_is!(adverb);
|
||||
gen_then_from_is!(frequency_adverb);
|
||||
|
||||
// Determiners
|
||||
|
||||
|
|
@ -463,6 +540,7 @@ impl SequenceExpr {
|
|||
gen_then_from_is!(possessive_determiner);
|
||||
gen_then_from_is!(quantifier);
|
||||
gen_then_from_is!(non_quantifier_determiner);
|
||||
gen_then_from_is!(non_demonstrative_determiner);
|
||||
|
||||
/// Push an [`IndefiniteArticle`] to the end of the operation list.
|
||||
pub fn then_indefinite_article(self) -> Self {
|
||||
|
|
@ -474,6 +552,12 @@ impl SequenceExpr {
|
|||
gen_then_from_is!(conjunction);
|
||||
gen_then_from_is!(preposition);
|
||||
|
||||
// Numbers
|
||||
|
||||
gen_then_from_is!(number);
|
||||
gen_then_from_is!(cardinal_number);
|
||||
gen_then_from_is!(ordinal_number);
|
||||
|
||||
// Punctuation
|
||||
|
||||
gen_then_from_is!(punctuation);
|
||||
|
|
@ -486,7 +570,6 @@ impl SequenceExpr {
|
|||
|
||||
// Other
|
||||
|
||||
gen_then_from_is!(number);
|
||||
gen_then_from_is!(case_separator);
|
||||
gen_then_from_is!(likely_homograph);
|
||||
}
|
||||
|
|
|
|||
121
harper-core/src/irregular_nouns.rs
Normal file
121
harper-core/src/irregular_nouns.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
type Noun = (String, String);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IrregularNouns {
|
||||
nouns: Vec<Noun>,
|
||||
}
|
||||
|
||||
/// The uncached function that is used to produce the original copy of the
|
||||
/// irregular noun table.
|
||||
fn uncached_inner_new() -> Arc<IrregularNouns> {
|
||||
IrregularNouns::from_json_file(include_str!("../irregular_nouns.json"))
|
||||
.map(Arc::new)
|
||||
.unwrap_or_else(|e| panic!("Failed to load irregular noun table: {}", e))
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref NOUNS: Arc<IrregularNouns> = uncached_inner_new();
|
||||
}
|
||||
|
||||
impl IrregularNouns {
|
||||
pub fn new() -> Self {
|
||||
Self { nouns: vec![] }
|
||||
}
|
||||
|
||||
pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
|
||||
// Deserialize into Vec<serde_json::Value> to handle mixed types
|
||||
let values: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json).expect("Failed to parse irregular nouns JSON");
|
||||
|
||||
let mut nouns = Vec::new();
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
serde_json::Value::Array(arr) if arr.len() == 2 => {
|
||||
// Handle array of 2 strings
|
||||
if let (Some(singular), Some(plural)) = (arr[0].as_str(), arr[1].as_str()) {
|
||||
nouns.push((singular.to_string(), plural.to_string()));
|
||||
}
|
||||
}
|
||||
// Strings are used for comments to guide contributors editing the file
|
||||
serde_json::Value::String(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { nouns })
|
||||
}
|
||||
|
||||
pub fn curated() -> Arc<Self> {
|
||||
(*NOUNS).clone()
|
||||
}
|
||||
|
||||
pub fn get_plural_for_singular(&self, singular: &str) -> Option<&str> {
|
||||
self.nouns
|
||||
.iter()
|
||||
.find(|(sg, _)| sg.eq_ignore_ascii_case(singular))
|
||||
.map(|(_, pl)| pl.as_str())
|
||||
}
|
||||
|
||||
pub fn get_singular_for_plural(&self, plural: &str) -> Option<&str> {
|
||||
self.nouns
|
||||
.iter()
|
||||
.find(|(_, pl)| pl.eq_ignore_ascii_case(plural))
|
||||
.map(|(sg, _)| sg.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IrregularNouns {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_plural_for_singular_lowercase() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("man"),
|
||||
Some("men")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_plural_for_singular_uppercase() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("WOMAN"),
|
||||
Some("women")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_singular_for_irregular_plural() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_singular_for_plural("children"),
|
||||
Some("child")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_regular_plural() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("car"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_non_noun() {
|
||||
assert_eq!(
|
||||
IrregularNouns::curated().get_plural_for_singular("the"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
120
harper-core/src/irregular_verbs.rs
Normal file
120
harper-core/src/irregular_verbs.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
type Verb = (String, String, String);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IrregularVerbs {
|
||||
verbs: Vec<Verb>,
|
||||
}
|
||||
|
||||
/// The uncached function that is used to produce the original copy of the
|
||||
/// irregular verb table.
|
||||
fn uncached_inner_new() -> Arc<IrregularVerbs> {
|
||||
IrregularVerbs::from_json_file(include_str!("../irregular_verbs.json"))
|
||||
.map(Arc::new)
|
||||
.unwrap_or_else(|e| panic!("Failed to load irregular verb table: {}", e))
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref VERBS: Arc<IrregularVerbs> = uncached_inner_new();
|
||||
}
|
||||
|
||||
impl IrregularVerbs {
|
||||
pub fn new() -> Self {
|
||||
Self { verbs: vec![] }
|
||||
}
|
||||
|
||||
pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
|
||||
// Deserialize into Vec<serde_json::Value> to handle mixed types
|
||||
let values: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json).expect("Failed to parse irregular verbs JSON");
|
||||
|
||||
let mut verbs = Vec::new();
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
serde_json::Value::Array(arr) if arr.len() == 3 => {
|
||||
// Handle array of 3 strings
|
||||
if let (Some(lemma), Some(preterite), Some(past_participle)) =
|
||||
(arr[0].as_str(), arr[1].as_str(), arr[2].as_str())
|
||||
{
|
||||
verbs.push((
|
||||
lemma.to_string(),
|
||||
preterite.to_string(),
|
||||
past_participle.to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// Strings are used for comments to guide contributors editing the file
|
||||
serde_json::Value::String(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { verbs })
|
||||
}
|
||||
|
||||
pub fn curated() -> Arc<Self> {
|
||||
(*VERBS).clone()
|
||||
}
|
||||
|
||||
pub fn get_past_participle_for_preterite(&self, preterite: &str) -> Option<&str> {
|
||||
self.verbs
|
||||
.iter()
|
||||
.find(|(_, pt, _)| pt.eq_ignore_ascii_case(preterite))
|
||||
.map(|(_, _, pp)| pp.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IrregularVerbs {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_for_preterite_lowercase() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("arose"),
|
||||
Some("arisen")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_for_preterite_uppercase() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("WENT"),
|
||||
Some("gone")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_find_irregular_past_participle_same_as_past_tense() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("taught"),
|
||||
Some("taught")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_regular_past_participle() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("walked"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cant_find_non_verb() {
|
||||
assert_eq!(
|
||||
IrregularVerbs::curated().get_past_participle_for_preterite("the"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -19,6 +19,11 @@ pub fn lex_hostname_token(source: &[char]) -> Option<FoundToken> {
|
|||
return None;
|
||||
}
|
||||
|
||||
// For the sake of semantics and downstream grammar checking.
|
||||
if !ends_with_common_tld(&source[0..len]) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(FoundToken {
|
||||
next_index: len,
|
||||
token: TokenKind::Hostname,
|
||||
|
|
@ -31,14 +36,14 @@ pub fn lex_hostname(source: &[char]) -> Option<usize> {
|
|||
// The beginning has different requirements from the rest of the hostname.
|
||||
let first = source.first()?;
|
||||
|
||||
if !matches!(first, 'A'..='Z' | 'a'..='z' | '0'..='9' ) {
|
||||
if !matches!(first, 'A'..='Z' | 'a'..='z' | '0'..='9' ) {
|
||||
return None;
|
||||
}
|
||||
|
||||
for label in source.split(|c| *c == '.') {
|
||||
for c in label {
|
||||
passed_chars += 1;
|
||||
if !matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '-') {
|
||||
if !matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '-') {
|
||||
return Some(passed_chars - 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -53,6 +58,34 @@ pub fn lex_hostname(source: &[char]) -> Option<usize> {
|
|||
}
|
||||
}
|
||||
|
||||
const COMMON_TLDS: &[&[char]] = &[
|
||||
&['c', 'o', 'm'],
|
||||
&['n', 'e', 't'],
|
||||
&['o', 'r', 'g'],
|
||||
&['e', 'd', 'u'],
|
||||
&['g', 'o', 'v'],
|
||||
&['m', 'i', 'l'],
|
||||
&['t', 'x', 't'],
|
||||
&['i', 'o'],
|
||||
&['c', 'o'],
|
||||
&['u', 's'],
|
||||
&['u', 'k'],
|
||||
&['d', 'e'],
|
||||
&['c', 'a'],
|
||||
&['a', 'u'],
|
||||
&['j', 'p'],
|
||||
];
|
||||
|
||||
fn ends_with_common_tld(input: &[char]) -> bool {
|
||||
for tld in COMMON_TLDS {
|
||||
let n = tld.len();
|
||||
if input.len() >= n && &input[input.len() - n..] == *tld {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::lex_hostname;
|
||||
|
|
|
|||
|
|
@ -426,7 +426,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn lexes_youtube_as_hostname() {
|
||||
let source: Vec<_> = "YouTube.com".chars().collect();
|
||||
let source: Vec<_> = "youtube.com".chars().collect();
|
||||
assert_eq!(
|
||||
lex_token(&source),
|
||||
Some(FoundToken {
|
||||
|
|
|
|||
|
|
@ -4,13 +4,15 @@
|
|||
mod char_ext;
|
||||
mod char_string;
|
||||
mod currency;
|
||||
pub mod dict_word_metadata;
|
||||
pub mod dict_word_metadata_orthography;
|
||||
mod dict_word_metadata;
|
||||
mod dict_word_metadata_orthography;
|
||||
mod document;
|
||||
mod edit_distance;
|
||||
pub mod expr;
|
||||
mod fat_token;
|
||||
mod ignored_lints;
|
||||
mod irregular_nouns;
|
||||
mod irregular_verbs;
|
||||
pub mod language_detection;
|
||||
mod lexing;
|
||||
pub mod linting;
|
||||
|
|
@ -35,13 +37,15 @@ use std::collections::{BTreeMap, VecDeque};
|
|||
pub use char_string::{CharString, CharStringExt};
|
||||
pub use currency::Currency;
|
||||
pub use dict_word_metadata::{
|
||||
AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DictWordMetadata, NounData,
|
||||
PronounData, VerbData, VerbForm,
|
||||
AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
|
||||
NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
|
||||
};
|
||||
pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
|
||||
pub use document::Document;
|
||||
pub use fat_token::{FatStringToken, FatToken};
|
||||
pub use ignored_lints::{IgnoredLints, LintContext};
|
||||
pub use irregular_nouns::IrregularNouns;
|
||||
pub use irregular_verbs::IrregularVerbs;
|
||||
use linting::Lint;
|
||||
pub use mask::{Mask, Masker};
|
||||
pub use number::{Number, OrdinalSuffix};
|
||||
|
|
@ -54,7 +58,7 @@ pub use token_kind::TokenKind;
|
|||
pub use token_string_ext::TokenStringExt;
|
||||
pub use vec_ext::VecExt;
|
||||
|
||||
/// Return harper-core version
|
||||
/// Return `harper-core` version
|
||||
pub fn core_version() -> &'static str {
|
||||
env!("CARGO_PKG_VERSION")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::expr::Expr;
|
||||
use crate::expr::FirstMatchOf;
|
||||
use crate::expr::FixedPhrase;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token, TokenStringExt,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
|
|
@ -26,6 +27,8 @@ impl Default for APart {
|
|||
}
|
||||
|
||||
impl ExprLinter for APart {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
148
harper-core/src/linting/a_while.rs
Normal file
148
harper-core/src/linting/a_while.rs
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use harper_brill::UPOS;
|
||||
|
||||
use crate::char_string::char_string;
|
||||
use crate::expr::{Expr, ExprMap, SequenceExpr};
|
||||
use crate::patterns::UPOSSet;
|
||||
use crate::{CharString, Token, TokenStringExt};
|
||||
|
||||
use super::expr_linter::Chunk;
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct AWhile {
|
||||
expr: Box<dyn Expr>,
|
||||
map: Arc<ExprMap<(CharString, &'static str)>>,
|
||||
}
|
||||
|
||||
impl Default for AWhile {
|
||||
fn default() -> Self {
|
||||
let mut map = ExprMap::default();
|
||||
|
||||
let a = SequenceExpr::default()
|
||||
.then(UPOSSet::new(&[UPOS::VERB]))
|
||||
.t_ws()
|
||||
.t_aco("a")
|
||||
.t_ws()
|
||||
.t_aco("while");
|
||||
|
||||
map.insert(
|
||||
a,
|
||||
(
|
||||
char_string!("awhile"),
|
||||
"Use the single word `awhile` when it follows a verb.",
|
||||
),
|
||||
);
|
||||
|
||||
let b = SequenceExpr::default()
|
||||
.then_unless(UPOSSet::new(&[UPOS::VERB]))
|
||||
.t_ws()
|
||||
.t_aco("awhile");
|
||||
|
||||
map.insert(
|
||||
b,
|
||||
(
|
||||
char_string!("a while"),
|
||||
"When not used after a verb, spell this duration as `a while`.",
|
||||
),
|
||||
);
|
||||
|
||||
let map = Arc::new(map);
|
||||
|
||||
Self {
|
||||
expr: Box::new(map.clone()),
|
||||
map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for AWhile {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let &(ref suggestion, message) = self.map.lookup(0, matched_tokens, source)?;
|
||||
let span = matched_tokens[2..].span()?;
|
||||
let suggestion =
|
||||
Suggestion::replace_with_match_case(suggestion.to_vec(), span.get_content(source));
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Typo,
|
||||
suggestions: vec![suggestion],
|
||||
message: message.to_owned(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Enforces `awhile` after verbs and `a while` everywhere else."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
use super::AWhile;
|
||||
|
||||
#[test]
|
||||
fn allow_issue_2144() {
|
||||
assert_no_lints(
|
||||
"After thinking awhile, I decided to foo a bar.",
|
||||
AWhile::default(),
|
||||
);
|
||||
assert_no_lints(
|
||||
"After thinking for a while, I decided to foo a bar.",
|
||||
AWhile::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_issue_2144() {
|
||||
assert_suggestion_result(
|
||||
"After thinking a while, I decided to foo a bar.",
|
||||
AWhile::default(),
|
||||
"After thinking awhile, I decided to foo a bar.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_in_quite_a_while() {
|
||||
assert_suggestion_result(
|
||||
"I haven't seen him in quite awhile.",
|
||||
AWhile::default(),
|
||||
"I haven't seen him in quite a while.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_in_a_while() {
|
||||
assert_suggestion_result(
|
||||
"I haven't checked in awhile.",
|
||||
AWhile::default(),
|
||||
"I haven't checked in a while.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_for_awhile() {
|
||||
assert_suggestion_result(
|
||||
"Video Element Error: MEDA_ERR_DECODE when chrome is left open for awhile",
|
||||
AWhile::default(),
|
||||
"Video Element Error: MEDA_ERR_DECODE when chrome is left open for a while",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_after_awhile() {
|
||||
assert_suggestion_result(
|
||||
"Links on portal stop working after awhile, requiring page refresh.",
|
||||
AWhile::default(),
|
||||
"Links on portal stop working after a while, requiring page refresh.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token,
|
||||
expr::{All, AnchorEnd, Expr, FirstMatchOf, LongestMatchOf, ReflexivePronoun, SequenceExpr},
|
||||
|
|
@ -34,6 +35,8 @@ impl Default for Addicting {
|
|||
}
|
||||
|
||||
impl ExprLinter for Addicting {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
CharStringExt, Token, TokenStringExt,
|
||||
expr::{Expr, SequenceExpr},
|
||||
|
|
@ -11,16 +12,20 @@ pub struct AdjectiveDoubleDegree {
|
|||
impl Default for AdjectiveDoubleDegree {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(SequenceExpr::word_set(&["more", "most"]).t_ws().then(
|
||||
|tok: &Token, _src: &[char]| {
|
||||
tok.kind.is_comparative_adjective() || tok.kind.is_superlative_adjective()
|
||||
},
|
||||
)),
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&["more", "most"])
|
||||
.t_ws()
|
||||
.then_kind_where(|kind| {
|
||||
kind.is_comparative_adjective() || kind.is_superlative_adjective()
|
||||
}),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for AdjectiveDoubleDegree {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::Token;
|
||||
use crate::expr::{DurationExpr, Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::token_string_ext::TokenStringExt;
|
||||
|
||||
|
|
@ -41,6 +42,8 @@ impl Default for AfterLater {
|
|||
}
|
||||
|
||||
impl ExprLinter for AfterLater {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
@ -70,7 +73,8 @@ impl ExprLinter for AfterLater {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{AfterLater, tests::assert_top3_suggestion_result};
|
||||
use super::AfterLater;
|
||||
use crate::linting::tests::assert_top3_suggestion_result;
|
||||
|
||||
#[test]
|
||||
fn after_90_days_later() {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::Token;
|
||||
use crate::char_string::CharStringExt;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::token_string_ext::TokenStringExt;
|
||||
|
||||
|
|
@ -41,6 +42,8 @@ impl Default for AllIntentsAndPurposes {
|
|||
}
|
||||
|
||||
impl ExprLinter for AllIntentsAndPurposes {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
120
harper-core/src/linting/allow_to.rs
Normal file
120
harper-core/src/linting/allow_to.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind};
|
||||
use crate::token::Token;
|
||||
use crate::token_string_ext::TokenStringExt;
|
||||
|
||||
pub struct AllowTo {
|
||||
exp: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for AllowTo {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
// Note: Does not include "allowed to", which is a legitimate usage in its own right.
|
||||
exp: Box::new(
|
||||
SequenceExpr::word_set(&["allow", "allowing", "allows"])
|
||||
.t_ws()
|
||||
.t_aco("to")
|
||||
.then_optional(SequenceExpr::default().t_ws().then_adverb())
|
||||
.t_ws()
|
||||
.then_any_word(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for AllowTo {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.exp.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], _src: &[char]) -> Option<Lint> {
|
||||
let span = toks.span()?;
|
||||
let first = toks.first()?;
|
||||
let allow = first.span.get_content_string(_src);
|
||||
|
||||
let message = format!(
|
||||
"For correct usage, either add a subject between `{allow}` and `to` (e.g., `{allow} someone to do`) or use the present participle (e.g., `{allow} doing`)."
|
||||
);
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Grammar,
|
||||
suggestions: vec![],
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Flags erroneous usage of `allow to` without a subject."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::AllowTo;
|
||||
use crate::linting::tests::{assert_lint_count, assert_no_lints};
|
||||
|
||||
#[test]
|
||||
fn flag_allow_to() {
|
||||
assert_lint_count(
|
||||
"Allow to change approval policy during running task # 4394.",
|
||||
AllowTo::default(),
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_allowing_to() {
|
||||
assert_lint_count(
|
||||
"Allowing to have multiple views with different filtering # 952.",
|
||||
AllowTo::default(),
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_allows_to() {
|
||||
assert_lint_count(
|
||||
"It is easily doable for classic IHostBuilder, because its extension allows to pass configure action",
|
||||
AllowTo::default(),
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_allowed_to() {
|
||||
assert_no_lints(
|
||||
"In C and C++ aliasing has to do with what expression types we are allowed to access stored values through.",
|
||||
AllowTo::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_allow_pronoun_to() {
|
||||
assert_no_lints(
|
||||
"It would be really great to allow me to enter body data using multipart form",
|
||||
AllowTo::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_allow_noun_to() {
|
||||
assert_no_lints(
|
||||
"Allows users to export SMART statistics from any connected hard drive",
|
||||
AllowTo::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_allow_np_to() {
|
||||
assert_no_lints(
|
||||
"This vulnerability allows an authenticated attacker to infer data from the database by measuring response times",
|
||||
AllowTo::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Span, Token, TokenStringExt,
|
||||
expr::{Expr, FixedPhrase, LongestMatchOf, SequenceExpr},
|
||||
|
|
@ -40,6 +41,8 @@ impl Default for AmInTheMorning {
|
|||
}
|
||||
|
||||
impl ExprLinter for AmInTheMorning {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use crate::expr::SequenceExpr;
|
|||
use crate::{Token, TokenStringExt, patterns::WordSet};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct AmountsFor {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -38,6 +39,8 @@ impl Default for AmountsFor {
|
|||
}
|
||||
|
||||
impl ExprLinter for AmountsFor {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,6 +109,10 @@ fn starts_with_vowel(word: &[char]) -> bool {
|
|||
let word = to_lower_word(word);
|
||||
let word = word.as_ref();
|
||||
|
||||
if matches!(word, ['e', 'u', 'l', 'e', ..]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if matches!(
|
||||
word,
|
||||
[] | ['u', 'k', ..]
|
||||
|
|
@ -224,6 +228,12 @@ mod tests {
|
|||
assert_lint_count("Here is a LLM-based system.", AnA, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_euler_as_vowel() {
|
||||
assert_lint_count("This is an Euler brick.", AnA, 0);
|
||||
assert_lint_count("The graph has an Eulerian tour.", AnA, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capitalized_fourier() {
|
||||
assert_lint_count("Then, perform a Fourier transform.", AnA, 0);
|
||||
|
|
|
|||
93
harper-core/src/linting/and_in.rs
Normal file
93
harper-core/src/linting/and_in.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct AndIn {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for AndIn {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(SequenceExpr::fixed_phrase("an in").then_optional_hyphen()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for AndIn {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
&*self.expr
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
if toks.len() != 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: toks[0].span,
|
||||
lint_kind: LintKind::Typo,
|
||||
message: "Did you mean `and in`?".to_string(),
|
||||
suggestions: vec![Suggestion::replace_with_match_case(
|
||||
['a', 'n', 'd'].to_vec(),
|
||||
toks[2].span.get_content(src),
|
||||
)],
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Fixes the incorrect phrase `an in` to `and in` for proper conjunction usage."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::AndIn;
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn dont_flag_an_in_house() {
|
||||
assert_no_lints(
|
||||
"for several years as an in-house engine, used to ...",
|
||||
AndIn::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_an_in_memory() {
|
||||
assert_no_lints(
|
||||
"including an in-memory real-time Vector Index,",
|
||||
AndIn::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_an_in_the_moment() {
|
||||
assert_no_lints(
|
||||
"His words serve as an in-the-moment explanation for what had happened.",
|
||||
AndIn::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_an_in_to_and_in() {
|
||||
assert_suggestion_result(
|
||||
"This is an expensive operation, so try to only do it at startup an in tests.",
|
||||
AndIn::default(),
|
||||
"This is an expensive operation, so try to only do it at startup and in tests.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "This is a known false positive - `an in` can be valid in some contexts"]
|
||||
fn dont_flag_an_in_with_company() {
|
||||
assert_no_lints(
|
||||
"His parents got him an in with the company.",
|
||||
AndIn::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
165
harper-core/src/linting/and_the_like.rs
Normal file
165
harper-core/src/linting/and_the_like.rs
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
use crate::expr::{All, Expr, FixedPhrase, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, LintKind, Suggestion};
|
||||
use crate::patterns::WordSet;
|
||||
use crate::token_string_ext::TokenStringExt;
|
||||
use crate::{Lint, Token};
|
||||
|
||||
pub struct AndTheLike {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for AndTheLike {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(All::new(vec![
|
||||
Box::new(
|
||||
// All known variants seen in the wild, good and bad
|
||||
SequenceExpr::word_set(&["and", "or", "an"])
|
||||
.t_ws()
|
||||
.then_optional(SequenceExpr::aco("the").t_ws())
|
||||
.then_word_set(&["alike", "alikes", "like", "likes"]),
|
||||
),
|
||||
Box::new(SequenceExpr::unless(
|
||||
SequenceExpr::word_set(&["and", "or"])
|
||||
.t_ws()
|
||||
.then_any_of(vec![
|
||||
// But not the correct variants
|
||||
Box::new(FixedPhrase::from_phrase("the like")),
|
||||
// And not the phrases that were coincidentally caught in the net
|
||||
Box::new(WordSet::new(&["like", "likes"])),
|
||||
]),
|
||||
)),
|
||||
])),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for AndTheLike {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let (conj, ws) = (&toks[0], &toks[1]);
|
||||
|
||||
let conj = if conj.span.get_content(src)[0] == 'a' {
|
||||
"and"
|
||||
} else {
|
||||
"or"
|
||||
};
|
||||
|
||||
let corrected = format!("{}{}the like", conj, ws.span.get_content_string(src));
|
||||
|
||||
Some(Lint {
|
||||
span: toks.span()?,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![Suggestion::replace_with_match_case(
|
||||
corrected.chars().collect(),
|
||||
toks.span()?.get_content(src),
|
||||
)],
|
||||
message: "If you intended the idiom meaning `similar things`, the correct form is with `the like`.".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Corrects mistakes in `and the like` and `or the like`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::AndTheLike;
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn dont_flag_and_the_like() {
|
||||
assert_no_lints(
|
||||
"The color of brackets and the like appears to be incorrect ...",
|
||||
AndTheLike::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_or_the_like() {
|
||||
assert_no_lints(
|
||||
"Does WCAG apply only to English (or the like), or does it aim to cover all languages?",
|
||||
AndTheLike::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_an_the_likes() {
|
||||
assert_suggestion_result(
|
||||
"Allow jsSourceDir (an the likes) to refer to the project root. #5",
|
||||
AndTheLike::default(),
|
||||
"Allow jsSourceDir (and the like) to refer to the project root. #5",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_and_alike() {
|
||||
assert_suggestion_result(
|
||||
"Latest release breaks FilePicker and alike",
|
||||
AndTheLike::default(),
|
||||
"Latest release breaks FilePicker and the like",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_and_alikes() {
|
||||
assert_suggestion_result(
|
||||
"Compiled functions (and alikes) need to keep references for their module objects",
|
||||
AndTheLike::default(),
|
||||
"Compiled functions (and the like) need to keep references for their module objects",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_and_the_alike() {
|
||||
assert_suggestion_result(
|
||||
"Suggestions, comments and the alike are welcome on http://waa.ai/4xtC",
|
||||
AndTheLike::default(),
|
||||
"Suggestions, comments and the like are welcome on http://waa.ai/4xtC",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_and_the_likes() {
|
||||
assert_suggestion_result(
|
||||
"Don't report \"expected semicolon or line break\", \"expected comma\" and the likes at every token boundary",
|
||||
AndTheLike::default(),
|
||||
"Don't report \"expected semicolon or line break\", \"expected comma\" and the like at every token boundary",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_or_alike() {
|
||||
assert_suggestion_result(
|
||||
"enable biome extension to \"monitor or alike\" the workspace.",
|
||||
AndTheLike::default(),
|
||||
"enable biome extension to \"monitor or the like\" the workspace.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_or_alikes() {
|
||||
assert_suggestion_result(
|
||||
"Persistent Compiler Caching with ccache or alikes",
|
||||
AndTheLike::default(),
|
||||
"Persistent Compiler Caching with ccache or the like",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_or_the_likes() {
|
||||
assert_suggestion_result(
|
||||
"Description of the problem: Implement aria2c or the likes to resume partial downloads.",
|
||||
AndTheLike::default(),
|
||||
"Description of the problem: Implement aria2c or the like to resume partial downloads.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token, TokenStringExt,
|
||||
expr::{Expr, FixedPhrase, SequenceExpr},
|
||||
|
|
@ -23,6 +24,8 @@ impl Default for AnotherThingComing {
|
|||
}
|
||||
|
||||
impl ExprLinter for AnotherThingComing {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token, TokenStringExt,
|
||||
expr::{Expr, FixedPhrase, SequenceExpr},
|
||||
|
|
@ -23,6 +24,8 @@ impl Default for AnotherThinkComing {
|
|||
}
|
||||
|
||||
impl ExprLinter for AnotherThinkComing {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
144
harper-core/src/linting/apart_from.rs
Normal file
144
harper-core/src/linting/apart_from.rs
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct ApartFrom {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for ApartFrom {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::any_capitalization_of("apart")
|
||||
.t_ws()
|
||||
.then_any_capitalization_of("form");
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for ApartFrom {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let span = matched_tokens.last()?.span;
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::WordChoice,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
"from",
|
||||
span.get_content(source),
|
||||
)],
|
||||
message: "Use `from` to spell `apart from`.".to_owned(),
|
||||
priority: 50,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Flags the misspelling `apart form` and suggests `apart from`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ApartFrom;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn corrects_basic_typo() {
|
||||
assert_suggestion_result(
|
||||
"Christianity was set apart form other religions.",
|
||||
ApartFrom::default(),
|
||||
"Christianity was set apart from other religions.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_title_case() {
|
||||
assert_suggestion_result(
|
||||
"Apart Form these files, everything uploaded fine.",
|
||||
ApartFrom::default(),
|
||||
"Apart From these files, everything uploaded fine.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_all_caps() {
|
||||
assert_suggestion_result(
|
||||
"APART FORM THE REST OF THE FIELD.",
|
||||
ApartFrom::default(),
|
||||
"APART FROM THE REST OF THE FIELD.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_with_comma() {
|
||||
assert_suggestion_result(
|
||||
"It was apart form, not apart from, the original plan.",
|
||||
ApartFrom::default(),
|
||||
"It was apart from, not apart from, the original plan.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_with_newline() {
|
||||
assert_suggestion_result(
|
||||
"They stood apart\nform everyone else at the rally.",
|
||||
ApartFrom::default(),
|
||||
"They stood apart\nfrom everyone else at the rally.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_extra_spacing() {
|
||||
assert_suggestion_result(
|
||||
"We keep the archive apart form public assets.",
|
||||
ApartFrom::default(),
|
||||
"We keep the archive apart from public assets.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_correct_phrase() {
|
||||
assert_lint_count(
|
||||
"Lebanon's freedoms set it apart from other Arab states.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_hyphenated() {
|
||||
assert_lint_count(
|
||||
"Their apart-form design wasn’t what we needed.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_split_by_comma() {
|
||||
assert_lint_count(
|
||||
"They stood apart, form lines when asked.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_unrelated_form_usage() {
|
||||
assert_lint_count(
|
||||
"The form was kept apart to dry after printing.",
|
||||
ApartFrom::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::expr::Expr;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Span, Token,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
|
|
@ -32,6 +33,8 @@ impl Default for AskNoPreposition {
|
|||
}
|
||||
|
||||
impl ExprLinter for AskNoPreposition {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use crate::expr::{Expr, SequenceExpr};
|
|||
use crate::linting::{LintKind, Suggestion};
|
||||
|
||||
use super::{ExprLinter, Lint};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct AvoidCurses {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -17,6 +18,8 @@ impl Default for AvoidCurses {
|
|||
}
|
||||
|
||||
impl ExprLinter for AvoidCurses {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use crate::{
|
|||
};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct BackInTheDay {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -34,6 +35,8 @@ impl Default for BackInTheDay {
|
|||
}
|
||||
|
||||
impl ExprLinter for BackInTheDay {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token,
|
||||
expr::{Expr, ExprMap, SequenceExpr},
|
||||
|
|
@ -51,6 +52,8 @@ impl Default for BeAllowed {
|
|||
}
|
||||
|
||||
impl ExprLinter for BeAllowed {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use crate::expr::{Expr, SequenceExpr};
|
|||
use crate::patterns::WhitespacePattern;
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct BestOfAllTime {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -37,6 +38,8 @@ impl Default for BestOfAllTime {
|
|||
}
|
||||
|
||||
impl ExprLinter for BestOfAllTime {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use crate::expr::{Expr, WordExprGroup};
|
|||
use crate::{Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct BoringWords {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -24,6 +25,8 @@ impl Default for BoringWords {
|
|||
}
|
||||
|
||||
impl ExprLinter for BoringWords {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use super::{ExprLinter, Lint, LintKind};
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::Suggestion;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct Bought {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -24,6 +25,8 @@ impl Default for Bought {
|
|||
}
|
||||
|
||||
impl ExprLinter for Bought {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
202
harper-core/src/linting/brand_brandish.rs
Normal file
202
harper-core/src/linting/brand_brandish.rs
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
use crate::{
|
||||
Lint, Token, TokenKind,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
};
|
||||
|
||||
pub struct BrandBrandish {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for BrandBrandish {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&["brandish", "brandished", "brandishes", "brandishing"])
|
||||
.t_ws()
|
||||
// "her" is also a possessive determiner as in "she brandished her sword"
|
||||
// "it" and "them" can refer to objects as in "draw your sword(s) and brandish it/them"
|
||||
.then_kind_except(TokenKind::is_object_pronoun, &["her", "it", "them"]),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for BrandBrandish {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let verb_span = toks.first()?.span;
|
||||
let verb_chars = verb_span.get_content(src);
|
||||
|
||||
enum Form {
|
||||
Base,
|
||||
Past,
|
||||
ThirdPerson,
|
||||
Ing,
|
||||
}
|
||||
|
||||
let infl = match verb_chars.last().map(|c| c.to_ascii_lowercase()) {
|
||||
Some('h') => Form::Base,
|
||||
Some('d') => Form::Past,
|
||||
Some('s') => Form::ThirdPerson,
|
||||
Some('g') => Form::Ing,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(Lint {
|
||||
span: verb_span,
|
||||
lint_kind: LintKind::Malapropism,
|
||||
suggestions: vec![Suggestion::replace_with_match_case_str(
|
||||
match infl {
|
||||
Form::Base => "brand",
|
||||
Form::Past => "branded",
|
||||
Form::ThirdPerson => "brands",
|
||||
Form::Ing => "branding",
|
||||
},
|
||||
verb_chars,
|
||||
)],
|
||||
message: "`Brandish` means to wield a weapon. You probably mean `brand`.".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Looks for `brandish` wrongly used when `brand` is intended."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{brand_brandish::BrandBrandish, tests::assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_a_traitor() {
|
||||
assert_suggestion_result(
|
||||
"Unretire Gretzky's sweater . Brandish him a traitor.",
|
||||
BrandBrandish::default(),
|
||||
"Unretire Gretzky's sweater . Brand him a traitor.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_a_criminal() {
|
||||
assert_suggestion_result(
|
||||
"lied to stop kuma's ideology from taking root and to brandish him a criminal that they could arrest",
|
||||
BrandBrandish::default(),
|
||||
"lied to stop kuma's ideology from taking root and to brand him a criminal that they could arrest",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_as_a() {
|
||||
assert_suggestion_result(
|
||||
"he was so afraid his thoughts could brandish him as a paedophile",
|
||||
BrandBrandish::default(),
|
||||
"he was so afraid his thoughts could brand him as a paedophile",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_an_offender() {
|
||||
assert_suggestion_result(
|
||||
"Chanel Oberlin's reason for purposely leading on Pete Martinez in order to humiliate him and brandish him a registered sex offender",
|
||||
BrandBrandish::default(),
|
||||
"Chanel Oberlin's reason for purposely leading on Pete Martinez in order to humiliate him and brand him a registered sex offender",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_with_nicknames() {
|
||||
assert_suggestion_result(
|
||||
"?? spoke out over the move by Kenyans to continuously brandish him with nicknames even after ...",
|
||||
BrandBrandish::default(),
|
||||
"?? spoke out over the move by Kenyans to continuously brand him with nicknames even after ...",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_as_a_aymbol() {
|
||||
assert_suggestion_result(
|
||||
"brandish him as an acclaimed symbol of humility, integrity and incorruptibility in the face of today's corrupt economic and political elite1",
|
||||
BrandBrandish::default(),
|
||||
"brand him as an acclaimed symbol of humility, integrity and incorruptibility in the face of today's corrupt economic and political elite1",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_as_illegal() {
|
||||
assert_suggestion_result(
|
||||
"To attempt to brandish him as an “illegal immigrant” is absolutely ridiculous and warrants an immediate retraction and apology.",
|
||||
BrandBrandish::default(),
|
||||
"To attempt to brand him as an “illegal immigrant” is absolutely ridiculous and warrants an immediate retraction and apology.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_with_nickname() {
|
||||
assert_suggestion_result(
|
||||
"The small minded townsfolk brandish him with the nickname \"Genepool\" due to his physical and cognitive shortcomings.",
|
||||
BrandBrandish::default(),
|
||||
"The small minded townsfolk brand him with the nickname \"Genepool\" due to his physical and cognitive shortcomings.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_with_label() {
|
||||
assert_suggestion_result(
|
||||
"One such reason that critics brandish him with this label is due to Peterson's opposition to Canada's Bill C-16",
|
||||
BrandBrandish::default(),
|
||||
"One such reason that critics brand him with this label is due to Peterson's opposition to Canada's Bill C-16",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandished_us() {
|
||||
assert_suggestion_result(
|
||||
"The mark they brandished us with will fade to dust when we finally meet our end.",
|
||||
BrandBrandish::default(),
|
||||
"The mark they branded us with will fade to dust when we finally meet our end.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandishing_him() {
|
||||
assert_suggestion_result(
|
||||
"he said some words trying to hit back at the center for brandishing him as a Pakistani at an NRC rally",
|
||||
BrandBrandish::default(),
|
||||
"he said some words trying to hit back at the center for branding him as a Pakistani at an NRC rally",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandish_us() {
|
||||
assert_suggestion_result(
|
||||
"Our resolute determination for the ultimate quality and all-inclusive directory of food commodities brandish us as a flawless associate in B2B",
|
||||
BrandBrandish::default(),
|
||||
"Our resolute determination for the ultimate quality and all-inclusive directory of food commodities brand us as a flawless associate in B2B",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandished_him() {
|
||||
assert_suggestion_result(
|
||||
"Frank discovers Myra brandished him with the letter 'R', for rapist.",
|
||||
BrandBrandish::default(),
|
||||
"Frank discovers Myra branded him with the letter 'R', for rapist.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_brandishes_him() {
|
||||
assert_suggestion_result(
|
||||
"Whether one turns a blind eye to Tim's wrongs or brandishes him a traitor will plant audiences in their own personal line in the sand.",
|
||||
BrandBrandish::default(),
|
||||
"Whether one turns a blind eye to Tim's wrongs or brands him a traitor will plant audiences in their own personal line in the sand.",
|
||||
)
|
||||
}
|
||||
}
|
||||
338
harper-core/src/linting/call_them.rs
Normal file
338
harper-core/src/linting/call_them.rs
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
use std::{ops::Range, sync::Arc};
|
||||
|
||||
use crate::expr::{Expr, ExprMap, SequenceExpr};
|
||||
use crate::patterns::{DerivedFrom, WordSet};
|
||||
use crate::{Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct CallThem {
|
||||
expr: Box<dyn Expr>,
|
||||
map: Arc<ExprMap<Range<usize>>>,
|
||||
}
|
||||
|
||||
impl Default for CallThem {
|
||||
fn default() -> Self {
|
||||
let mut map = ExprMap::default();
|
||||
|
||||
let post_exception = Arc::new(
|
||||
SequenceExpr::default()
|
||||
.t_ws()
|
||||
.then(WordSet::new(&["if", "it"])),
|
||||
);
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then(DerivedFrom::new_from_str("call"))
|
||||
.t_ws()
|
||||
.then_pronoun()
|
||||
.t_ws()
|
||||
.t_aco("as")
|
||||
.then_unless(post_exception.clone()),
|
||||
3..5,
|
||||
);
|
||||
|
||||
map.insert(
|
||||
SequenceExpr::default()
|
||||
.then(DerivedFrom::new_from_str("call"))
|
||||
.t_ws()
|
||||
.t_aco("as")
|
||||
.t_ws()
|
||||
.then_pronoun()
|
||||
.then_unless(post_exception.clone()),
|
||||
1..3,
|
||||
);
|
||||
|
||||
let map = Arc::new(map);
|
||||
|
||||
Self {
|
||||
expr: Box::new(map.clone()),
|
||||
map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for CallThem {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let removal_range = self.map.lookup(0, matched_tokens, source)?.clone();
|
||||
let offending_tokens = matched_tokens.get(removal_range)?;
|
||||
|
||||
Some(Lint {
|
||||
span: offending_tokens.span()?,
|
||||
lint_kind: LintKind::Redundancy,
|
||||
suggestions: vec![Suggestion::Remove],
|
||||
message: "`as` is redundant in this context.".to_owned(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Addresses the non-idiomatic phrases `call them as`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use crate::Document;
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
use super::CallThem;
|
||||
|
||||
#[test]
|
||||
fn prefer_plug_and_receptacle() {
|
||||
assert_suggestion_result(
|
||||
r#"I prefer to call them as Plug (male) and Receptacle (female). Receptacles are seen in laptops, mobile phones etc.."#,
|
||||
CallThem::default(),
|
||||
r#"I prefer to call them Plug (male) and Receptacle (female). Receptacles are seen in laptops, mobile phones etc.."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builtins_id() {
|
||||
assert_suggestion_result(
|
||||
r#"I’d categorically ignore *id* as a builtin, and when you do need it in a module, make it super explicit and `import builtins` and call it as `builtins.id`."#,
|
||||
CallThem::default(),
|
||||
r#"I’d categorically ignore *id* as a builtin, and when you do need it in a module, make it super explicit and `import builtins` and call it `builtins.id`."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_modal_dialogue() {
|
||||
assert_suggestion_result(
|
||||
r#"We usually call it as non-modal dialogue e.g. when hit Gmail compose button, a nonmodal dialogue opens."#,
|
||||
CallThem::default(),
|
||||
r#"We usually call it non-modal dialogue e.g. when hit Gmail compose button, a nonmodal dialogue opens."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefer_to_call_them() {
|
||||
assert_suggestion_result(
|
||||
r#"So, how do you typically prefer to call them as?"#,
|
||||
CallThem::default(),
|
||||
r#"So, how do you typically prefer to call them?"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn called_them_allies() {
|
||||
assert_suggestion_result(
|
||||
r#"Yes as tribes or nomads you called them as allies but you didn’t get their levies as your own."#,
|
||||
CallThem::default(),
|
||||
r#"Yes as tribes or nomads you called them allies but you didn’t get their levies as your own."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn character_development() {
|
||||
assert_suggestion_result(
|
||||
r#"I call this as character development."#,
|
||||
CallThem::default(),
|
||||
r#"I call this character development."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fate_or_time() {
|
||||
assert_suggestion_result(
|
||||
r#"Should I Call It As Fate Or Time"#,
|
||||
CallThem::default(),
|
||||
r#"Should I Call It Fate Or Time"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn abstract_latte_art() {
|
||||
assert_suggestion_result(
|
||||
r#"Can we just call it as abstract latte art."#,
|
||||
CallThem::default(),
|
||||
r#"Can we just call it abstract latte art."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sounding_boards() {
|
||||
assert_suggestion_result(
|
||||
r#"I call them as my ‘sounding boards’"#,
|
||||
CallThem::default(),
|
||||
r#"I call them my ‘sounding boards’"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calling_them_disaster() {
|
||||
assert_suggestion_result(
|
||||
r#"I totally disagree with your point listed and calling them as disaster."#,
|
||||
CallThem::default(),
|
||||
r#"I totally disagree with your point listed and calling them disaster."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn battle_of_boxes() {
|
||||
assert_suggestion_result(
|
||||
r#"Windows Sandbox and VirtualBox or I would like to call this as “Battle of Boxes.”"#,
|
||||
CallThem::default(),
|
||||
r#"Windows Sandbox and VirtualBox or I would like to call this “Battle of Boxes.”"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn called_her_shinnasan() {
|
||||
assert_suggestion_result(
|
||||
r#"Nice meeting a follower from reddit I called her as Shinna-san, welcome again to Toram!!"#,
|
||||
CallThem::default(),
|
||||
r#"Nice meeting a follower from reddit I called her Shinna-san, welcome again to Toram!!"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calling_it_otp() {
|
||||
assert_suggestion_result(
|
||||
r#"Calling it as OTP in this case misleading"#,
|
||||
CallThem::default(),
|
||||
r#"Calling it OTP in this case misleading"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_it_procrastination() {
|
||||
assert_suggestion_result(
|
||||
r#"To summarise it in just one word I would call it as procrastination."#,
|
||||
CallThem::default(),
|
||||
r#"To summarise it in just one word I would call it procrastination."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_her_important() {
|
||||
assert_suggestion_result(
|
||||
r#"Liked the article overall but to call her as important to rap as Jay or Dre is a bold overstatement."#,
|
||||
CallThem::default(),
|
||||
r#"Liked the article overall but to call her important to rap as Jay or Dre is a bold overstatement."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_him_kindles() {
|
||||
assert_suggestion_result(
|
||||
r#"The days when I had my first best friend, I would rather call him as human version of kindle audiobook, who keeps on talking about everything under the umbrella."#,
|
||||
CallThem::default(),
|
||||
r#"The days when I had my first best friend, I would rather call him human version of kindle audiobook, who keeps on talking about everything under the umbrella."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_them_defenders() {
|
||||
assert_suggestion_result(
|
||||
r#"Declaring war challenging land of a vassal should call them as defenders!"#,
|
||||
CallThem::default(),
|
||||
r#"Declaring war challenging land of a vassal should call them defenders!"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_it_magical() {
|
||||
assert_suggestion_result(
|
||||
r#"I would like to call it as magical."#,
|
||||
CallThem::default(),
|
||||
r#"I would like to call it magical."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_lateral() {
|
||||
assert_suggestion_result(
|
||||
r#"Surprised the refs didn’t call this as a forward lateral."#,
|
||||
CallThem::default(),
|
||||
r#"Surprised the refs didn’t call this a forward lateral."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calling_best_friend() {
|
||||
assert_suggestion_result(
|
||||
r#"Meet my buddy! I love calling him as my best friend, because he never failed to bring some cheer in me!"#,
|
||||
CallThem::default(),
|
||||
r#"Meet my buddy! I love calling him my best friend, because he never failed to bring some cheer in me!"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calling_everyone_titles() {
|
||||
assert_suggestion_result(
|
||||
r#"Currently, I’m teaching in Asia and the students have the local custom of calling everyone as Mr. Givenname or Miss Givenname"#,
|
||||
CallThem::default(),
|
||||
r#"Currently, I’m teaching in Asia and the students have the local custom of calling everyone Mr. Givenname or Miss Givenname"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn called_as_he() {
|
||||
assert_suggestion_result(
|
||||
r#"I prefer to be called as he when referred in 3rd person and I’m sure that everyone would be ok to call me as he."#,
|
||||
CallThem::default(),
|
||||
r#"I prefer to be called he when referred in 3rd person and I’m sure that everyone would be ok to call me he."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calls_him_bob() {
|
||||
assert_suggestion_result(
|
||||
r#"In Twelve Monkeys, Cole hears someone who calls him as “Bob”"#,
|
||||
CallThem::default(),
|
||||
r#"In Twelve Monkeys, Cole hears someone who calls him “Bob”"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pliny_called_it() {
|
||||
assert_suggestion_result(
|
||||
r#"Pliny the Elder called it as lake of Gennesaret or Taricheae in his encyclopedia, Natural History."#,
|
||||
CallThem::default(),
|
||||
r#"Pliny the Elder called it lake of Gennesaret or Taricheae in his encyclopedia, Natural History."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn students_call_you() {
|
||||
assert_suggestion_result(
|
||||
r#"In the same way your students will call you as ~先生 even after they graduated/move to higher education."#,
|
||||
CallThem::default(),
|
||||
r#"In the same way your students will call you ~先生 even after they graduated/move to higher education."#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paradoxical_reaction() {
|
||||
assert_suggestion_result(
|
||||
r#"We can call it as Paradoxical Reaction which means a medicine which is used to reduce pain increases the pain when it is"#,
|
||||
CallThem::default(),
|
||||
r#"We can call it Paradoxical Reaction which means a medicine which is used to reduce pain increases the pain when it is"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module() {
|
||||
assert_no_lints(
|
||||
"I want to call them as if they were just another Rust module",
|
||||
CallThem::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn want_to_do() {
|
||||
assert_no_lints(
|
||||
"however its a design choice to not call it as it does things I don't want to do.",
|
||||
CallThem::default(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ use super::{ExprLinter, Suggestion};
|
|||
use crate::Lint;
|
||||
use crate::expr::{Expr, LongestMatchOf, SequenceExpr};
|
||||
use crate::linting::LintKind;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::expr_linter::find_the_only_token_matching;
|
||||
use crate::{CharStringExt, Token};
|
||||
|
||||
|
|
@ -31,6 +32,8 @@ impl Default for Cant {
|
|||
}
|
||||
|
||||
impl ExprLinter for Cant {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
|
|
@ -27,6 +28,8 @@ impl Default for CautionaryTale {
|
|||
}
|
||||
|
||||
impl ExprLinter for CautionaryTale {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
@ -54,10 +57,8 @@ impl ExprLinter for CautionaryTale {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{
|
||||
CautionaryTale,
|
||||
tests::{assert_lint_count, assert_suggestion_result},
|
||||
};
|
||||
use super::CautionaryTale;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn catches_cautionary_tail() {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::patterns::Word;
|
||||
|
||||
|
|
@ -33,6 +34,8 @@ impl Default for ChangeTack {
|
|||
}
|
||||
|
||||
impl ExprLinter for ChangeTack {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
@ -61,7 +64,8 @@ impl ExprLinter for ChangeTack {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{ChangeTack, tests::assert_suggestion_result};
|
||||
use super::ChangeTack;
|
||||
use crate::linting::tests::assert_suggestion_result;
|
||||
|
||||
// Verbs: change tack
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ use crate::expr::SpaceOrHyphen;
|
|||
use crate::{Token, TokenStringExt, patterns::WordSet};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct ChockFull {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -23,6 +24,8 @@ impl Default for ChockFull {
|
|||
}
|
||||
|
||||
impl ExprLinter for ChockFull {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ impl Linter for CommaFixes {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::CommaFixes;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
use crate::linting::tests::{assert_lint_count, assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn allows_english_comma_atomic() {
|
||||
|
|
@ -203,4 +203,12 @@ mod tests {
|
|||
fn doesnt_correct_comma_between_non_english_tokens() {
|
||||
assert_lint_count("严禁采摘花、 果、叶,挖掘树根、草药!", CommaFixes, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_2233() {
|
||||
assert_no_lints(
|
||||
"In foobar, apple is a fruit, and \"beer\" is not a fruit.",
|
||||
CommaFixes,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
use crate::expr::All;
|
||||
use crate::expr::Expr;
|
||||
use crate::expr::MergeableWords;
|
||||
use crate::expr::OwnedExprExt;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::patterns::InflectionOfBe;
|
||||
use crate::{CharStringExt, TokenStringExt, linting::ExprLinter};
|
||||
|
||||
use super::{Lint, LintKind, Suggestion, is_content_word, predicate};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
use crate::{Lrc, Token};
|
||||
|
||||
|
|
@ -30,7 +33,7 @@ impl Default for CompoundNounAfterDetAdj {
|
|||
.t_ws()
|
||||
.then(is_content_word)
|
||||
.t_ws()
|
||||
.then(is_content_word);
|
||||
.then(is_content_word.and_not(InflectionOfBe::default()));
|
||||
|
||||
let split_expr = Lrc::new(MergeableWords::new(|meta_closed, meta_open| {
|
||||
predicate(meta_closed, meta_open)
|
||||
|
|
@ -38,12 +41,7 @@ impl Default for CompoundNounAfterDetAdj {
|
|||
|
||||
let mut expr = All::default();
|
||||
expr.add(context_expr);
|
||||
expr.add(
|
||||
SequenceExpr::default()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.then(split_expr.clone()),
|
||||
);
|
||||
expr.add(SequenceExpr::anything().t_any().then(split_expr.clone()));
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
|
|
@ -53,6 +51,8 @@ impl Default for CompoundNounAfterDetAdj {
|
|||
}
|
||||
|
||||
impl ExprLinter for CompoundNounAfterDetAdj {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use crate::patterns::AnyPattern;
|
|||
use crate::{CharStringExt, Lrc, TokenStringExt, linting::ExprLinter};
|
||||
|
||||
use super::{Lint, LintKind, Suggestion, is_content_word, predicate};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
use crate::Token;
|
||||
|
||||
|
|
@ -52,6 +53,8 @@ impl Default for CompoundNounAfterPossessive {
|
|||
}
|
||||
|
||||
impl ExprLinter for CompoundNounAfterPossessive {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use crate::{CharStringExt, Lrc, TokenStringExt, linting::ExprLinter};
|
|||
use super::{Lint, LintKind, Suggestion, is_content_word, predicate};
|
||||
|
||||
use crate::Token;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
/// Two adjacent words separated by whitespace that if joined would be a valid noun.
|
||||
pub struct CompoundNounBeforeAuxVerb {
|
||||
|
|
@ -44,6 +45,8 @@ impl Default for CompoundNounBeforeAuxVerb {
|
|||
}
|
||||
|
||||
impl ExprLinter for CompoundNounBeforeAuxVerb {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token, TokenKind,
|
||||
expr::{AnchorStart, Expr, SequenceExpr},
|
||||
|
|
@ -41,6 +42,8 @@ impl Default for CompoundSubjectI {
|
|||
}
|
||||
|
||||
impl ExprLinter for CompoundSubjectI {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::expr::Expr;
|
||||
use crate::expr::OwnedExprExt;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{Token, patterns::Word};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
|
@ -28,6 +29,8 @@ impl Default for Confident {
|
|||
}
|
||||
|
||||
impl ExprLinter for Confident {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use super::expr_linter::ExprLinter;
|
|||
use crate::expr::Expr;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::linting::LintKind;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::patterns::WordSet;
|
||||
use crate::{Lint, Lrc, Token, TokenStringExt};
|
||||
|
||||
|
|
@ -33,6 +34,8 @@ impl CriteriaPhenomena {
|
|||
}
|
||||
|
||||
impl ExprLinter for CriteriaPhenomena {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
147
harper-core/src/linting/cure_for.rs
Normal file
147
harper-core/src/linting/cure_for.rs
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
use crate::{
|
||||
Span, Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::expr_linter::Chunk,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
patterns::{DerivedFrom, Word},
|
||||
};
|
||||
|
||||
pub struct CureFor {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for CureFor {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then(DerivedFrom::new_from_str("cure"))
|
||||
.t_ws()
|
||||
.then(Word::new("against"));
|
||||
|
||||
Self {
|
||||
expr: Box::new(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for CureFor {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
let against = matched_tokens.last()?;
|
||||
|
||||
let template: Vec<char> = against.span.get_content(source).to_vec();
|
||||
let suggestion = Suggestion::replace_with_match_case_str("for", &template);
|
||||
|
||||
Some(Lint {
|
||||
span: Span::new(against.span.start, against.span.end),
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![suggestion],
|
||||
message: "Prefer `cure for` when describing a treatment target.".to_owned(),
|
||||
priority: 31,
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Flags `cure against` and prefers the standard `cure for` pairing."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::CureFor;
|
||||
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn corrects_simple_cure_against() {
|
||||
assert_suggestion_result(
|
||||
"Researchers sought a cure against the stubborn illness.",
|
||||
CureFor::default(),
|
||||
"Researchers sought a cure for the stubborn illness.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_plural_cures_against() {
|
||||
assert_suggestion_result(
|
||||
"Doctors insist this serum cures against the new variant.",
|
||||
CureFor::default(),
|
||||
"Doctors insist this serum cures for the new variant.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_past_participle_cured_against() {
|
||||
assert_suggestion_result(
|
||||
"The remedy was cured against the infection last spring.",
|
||||
CureFor::default(),
|
||||
"The remedy was cured for the infection last spring.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_uppercase_against() {
|
||||
assert_suggestion_result(
|
||||
"We still trust the cure AGAINST the dreaded plague.",
|
||||
CureFor::default(),
|
||||
"We still trust the cure FOR the dreaded plague.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrects_at_sentence_start() {
|
||||
assert_suggestion_result(
|
||||
"Cure against that condition became the rallying cry.",
|
||||
CureFor::default(),
|
||||
"Cure for that condition became the rallying cry.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_cure_for() {
|
||||
assert_lint_count(
|
||||
"They finally found a cure for the fever.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_cure_from() {
|
||||
assert_lint_count(
|
||||
"A cure from this rare herb is on the horizon.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_with_comma() {
|
||||
assert_lint_count(
|
||||
"A cure, against all odds, appeared in the files.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_unrelated_against() {
|
||||
assert_lint_count(
|
||||
"Travelers stand against the roaring wind on the cliffs.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_flag_secure_against() {
|
||||
assert_lint_count(
|
||||
"The fortress stayed secure against the invaders.",
|
||||
CureFor::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -110,7 +110,7 @@ mod tests {
|
|||
#[test]
|
||||
fn multiple_dollar() {
|
||||
assert_suggestion_result(
|
||||
"They were either 25$ 24$ or 23$.",
|
||||
"They were either 25\\$ 24\\$ or 23\\$.",
|
||||
CurrencyPlacement::default(),
|
||||
"They were either $25 $24 or $23.",
|
||||
);
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ use crate::expr::SequenceExpr;
|
|||
use crate::{Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
const EN_DASH: char = '–';
|
||||
const EM_DASH: char = '—';
|
||||
|
|
@ -29,6 +30,8 @@ impl Default for Dashes {
|
|||
}
|
||||
|
||||
impl ExprLinter for Dashes {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use crate::expr::SequenceExpr;
|
|||
use crate::{Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct DespiteOf {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -21,6 +22,8 @@ impl Default for DespiteOf {
|
|||
}
|
||||
|
||||
impl ExprLinter for DespiteOf {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use crate::expr::SequenceExpr;
|
|||
use crate::{Token, TokenStringExt};
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct DeterminerWithoutNoun {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -11,7 +12,7 @@ pub struct DeterminerWithoutNoun {
|
|||
impl Default for DeterminerWithoutNoun {
|
||||
fn default() -> Self {
|
||||
let expr = SequenceExpr::default()
|
||||
.then(|tok: &Token, _: &[char]| tok.kind.is_determiner())
|
||||
.then_kind_where(|kind| kind.is_determiner())
|
||||
.t_ws()
|
||||
.then_conjunction();
|
||||
|
||||
|
|
@ -22,6 +23,8 @@ impl Default for DeterminerWithoutNoun {
|
|||
}
|
||||
|
||||
impl ExprLinter for DeterminerWithoutNoun {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
pub struct Didnt {
|
||||
|
|
@ -9,7 +10,7 @@ pub struct Didnt {
|
|||
impl Default for Didnt {
|
||||
fn default() -> Self {
|
||||
let pattern = SequenceExpr::default()
|
||||
.then_personal_pronoun()
|
||||
.then_subject_pronoun()
|
||||
.t_ws()
|
||||
.t_aco("dint");
|
||||
|
||||
|
|
@ -20,6 +21,8 @@ impl Default for Didnt {
|
|||
}
|
||||
|
||||
impl ExprLinter for Didnt {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
206
harper-core/src/linting/disjoint_prefixes.rs
Normal file
206
harper-core/src/linting/disjoint_prefixes.rs
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
use crate::{
|
||||
Lint, Token, TokenKind, TokenStringExt,
|
||||
expr::{Expr, OwnedExprExt, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
spell::Dictionary,
|
||||
};
|
||||
|
||||
pub struct DisjointPrefixes<D> {
|
||||
expr: Box<dyn Expr>,
|
||||
dict: D,
|
||||
}
|
||||
|
||||
// Known false positives not to join to these prefixes:
|
||||
const OUT_EXCEPTIONS: &[&str] = &["boxes", "facing", "live", "numbers", "playing"];
|
||||
const OVER_EXCEPTIONS: &[&str] = &["all", "joy", "long", "night", "reading", "steps", "time"];
|
||||
const UNDER_EXCEPTIONS: &[&str] = &["development", "mine"];
|
||||
const UP_EXCEPTIONS: &[&str] = &["loading", "right", "state", "time", "trend"];
|
||||
|
||||
impl<D> DisjointPrefixes<D>
|
||||
where
|
||||
D: Dictionary,
|
||||
{
|
||||
pub fn new(dict: D) -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::word_set(&[
|
||||
// These prefixes rarely cause false positives
|
||||
"anti", "auto", "bi", "counter", "de", "dis", "extra", "fore", "hyper", "il",
|
||||
"im", "inter", "ir", "macro", "mal", "micro", "mid", "mini", "mis", "mono",
|
||||
"multi", "non", "omni", "post", "pre", "pro", "re", "semi", "sub", "super",
|
||||
"trans", "tri", "ultra", "un", "uni",
|
||||
// "co" has one very common false positive: co-op != coop
|
||||
"co",
|
||||
// These prefixes are all also words in their own right, which leads to more false positives.
|
||||
"out", "over", "under",
|
||||
"up",
|
||||
// These prefixes are commented out due to too many false positives
|
||||
// or incorrect transformations:
|
||||
// "a": a live -> alive
|
||||
// "in": in C -> inc; in action -> inaction
|
||||
])
|
||||
.t_ws_h()
|
||||
.then_kind_either(TokenKind::is_verb, TokenKind::is_noun)
|
||||
.then_optional_hyphen()
|
||||
.and_not(SequenceExpr::any_of(vec![
|
||||
// No trailing hyphen. Ex: Custom patterns take precedence over built-in patterns -> overbuilt
|
||||
Box::new(SequenceExpr::anything().t_any().t_any().then_hyphen()),
|
||||
// Don't merge "co op" whether separated by space or hyphen.
|
||||
Box::new(SequenceExpr::aco("co").t_any().t_set(&["op", "ops"])),
|
||||
// Merge these if they're separated by hyphen, but not space.
|
||||
Box::new(SequenceExpr::aco("out").t_ws().t_set(OUT_EXCEPTIONS)),
|
||||
Box::new(SequenceExpr::aco("over").t_ws().t_set(OVER_EXCEPTIONS)),
|
||||
Box::new(SequenceExpr::aco("under").t_ws().t_set(UNDER_EXCEPTIONS)),
|
||||
Box::new(SequenceExpr::aco("up").t_ws().t_set(UP_EXCEPTIONS)),
|
||||
])),
|
||||
),
|
||||
dict,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D> ExprLinter for DisjointPrefixes<D>
|
||||
where
|
||||
D: Dictionary,
|
||||
{
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
toks: &[Token],
|
||||
src: &[char],
|
||||
ctx: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
let toks_span = toks.span()?;
|
||||
let (pre, _) = ctx?;
|
||||
|
||||
// Cloud Native Pub-Sub System at Pinterest -> subsystem
|
||||
if pre.last().is_some_and(|p| p.kind.is_hyphen()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Avoid including text from unlintable sections between tokens
|
||||
// that could result from naively using toks.span()?.get_content_string(src)
|
||||
let original = format!(
|
||||
"{}{}{}",
|
||||
toks[0].span.get_content_string(src),
|
||||
if toks[1].kind.is_hyphen() { '-' } else { ' ' },
|
||||
toks[2].span.get_content_string(src)
|
||||
);
|
||||
|
||||
// If the original form is in the dictionary, return None
|
||||
if self.dict.contains_word_str(&original) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut hyphenated = None;
|
||||
if !toks[1].kind.is_hyphen() {
|
||||
hyphenated = Some(format!(
|
||||
"{}-{}",
|
||||
toks[0].span.get_content_string(src),
|
||||
toks[2].span.get_content_string(src)
|
||||
));
|
||||
}
|
||||
let joined = Some(format!(
|
||||
"{}{}",
|
||||
toks[0].span.get_content_string(src),
|
||||
toks[2].span.get_content_string(src)
|
||||
));
|
||||
|
||||
// Check if either joined or hyphenated form is in the dictionary
|
||||
let joined_valid = joined
|
||||
.as_ref()
|
||||
.is_some_and(|j| self.dict.contains_word_str(j));
|
||||
let hyphenated_valid = hyphenated
|
||||
.as_ref()
|
||||
.is_some_and(|h| self.dict.contains_word_str(h));
|
||||
|
||||
if !joined_valid && !hyphenated_valid {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Joining with a hyphen when original is separated by space is more likely correct
|
||||
// if hyphenated form is in the dictionary. So add first if verified.
|
||||
// Joining when separated by a space is more common but also has more false positives, so add them second.
|
||||
let suggestions = [(&hyphenated, hyphenated_valid), (&joined, joined_valid)]
|
||||
.into_iter()
|
||||
.filter_map(|(word, is_valid)| word.as_ref().filter(|_| is_valid))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let suggestions = suggestions
|
||||
.iter()
|
||||
.map(|s| {
|
||||
Suggestion::replace_with_match_case(s.chars().collect(), toks_span.get_content(src))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Some(Lint {
|
||||
span: toks_span,
|
||||
lint_kind: LintKind::Spelling,
|
||||
suggestions,
|
||||
message: "This looks like a prefix that can be joined with the rest of the word."
|
||||
.to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Looks for words with their prefixes written with a space or hyphen between instead of joined."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DisjointPrefixes;
|
||||
use crate::{
|
||||
linting::tests::{assert_no_lints, assert_suggestion_result},
|
||||
spell::FstDictionary,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn fix_hyphenated_to_joined() {
|
||||
assert_suggestion_result(
|
||||
"Download pre-built binaries or build from source.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
"Download prebuilt binaries or build from source.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_open_to_joined() {
|
||||
assert_suggestion_result(
|
||||
"Advanced Nginx configuration available for super users",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
"Advanced Nginx configuration available for superusers",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_join_open_co_op() {
|
||||
assert_no_lints(
|
||||
"They are cheaper at the co op.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_join_hyphenated_co_op() {
|
||||
assert_no_lints(
|
||||
"Almost everything is cheaper at the co-op.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_open_to_hyphenated() {
|
||||
assert_suggestion_result(
|
||||
"My hobby is de extinction of the dinosaurs.",
|
||||
DisjointPrefixes::new(FstDictionary::curated()),
|
||||
"My hobby is de-extinction of the dinosaurs.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ use crate::expr::WordExprGroup;
|
|||
use hashbrown::HashMap;
|
||||
|
||||
use super::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{Token, TokenStringExt};
|
||||
|
||||
pub struct DotInitialisms {
|
||||
|
|
@ -35,6 +36,8 @@ impl Default for DotInitialisms {
|
|||
}
|
||||
|
||||
impl ExprLinter for DotInitialisms {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token, TokenKind, TokenStringExt,
|
||||
expr::{Expr, ExprMap, SequenceExpr},
|
||||
|
|
@ -57,6 +58,8 @@ impl Default for DoubleClick {
|
|||
}
|
||||
|
||||
impl ExprLinter for DoubleClick {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use crate::{Token, TokenStringExt};
|
|||
|
||||
use super::Suggestion;
|
||||
use super::{ExprLinter, Lint, LintKind};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
|
||||
pub struct DoubleModal {
|
||||
expr: Box<dyn Expr>,
|
||||
|
|
@ -24,6 +25,8 @@ impl Default for DoubleModal {
|
|||
}
|
||||
|
||||
impl ExprLinter for DoubleModal {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::expr::Expr;
|
||||
use crate::expr::OwnedExprExt;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{
|
||||
Token,
|
||||
linting::{ExprLinter, Lint, LintKind, Suggestion},
|
||||
|
|
@ -36,6 +37,8 @@ impl Default for ElsePossessive {
|
|||
}
|
||||
|
||||
impl ExprLinter for ElsePossessive {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
230
harper-core/src/linting/ever_every.rs
Normal file
230
harper-core/src/linting/ever_every.rs
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
use crate::{
|
||||
Lint, Token,
|
||||
expr::{Expr, OwnedExprExt, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
patterns::{ModalVerb, WordSet},
|
||||
};
|
||||
|
||||
pub struct EverEvery {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for EverEvery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::any_of(vec![
|
||||
Box::new(WordSet::new(&[
|
||||
"are", "aren't", "arent", "did", "didn't", "didnt", "do", "does",
|
||||
"doesn't", "doesnt", "dont", "don't", "had", "hadn't", "hadnt", "has",
|
||||
"hasn't", "hasnt", "have", "haven't", "havent", "is", "isn't", "isnt",
|
||||
"was", "wasn't", "wasnt", "were", "weren't", "werent",
|
||||
])),
|
||||
Box::new(ModalVerb::with_common_errors()),
|
||||
])
|
||||
.t_ws()
|
||||
.then_subject_pronoun()
|
||||
.t_ws()
|
||||
.t_aco("every")
|
||||
.and_not(SequenceExpr::anything().t_any().t_aco("it")),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for EverEvery {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let span = toks[4].span;
|
||||
let content = span.get_content(src);
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Typo,
|
||||
suggestions: vec![Suggestion::replace_with_match_case(
|
||||
content[..content.len() - 1].to_vec(),
|
||||
content,
|
||||
)],
|
||||
message: "Is this `every` a typo that should be `ever`?".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Tries to correct typos of `every` instead of `ever`."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::EverEvery;
|
||||
use crate::linting::tests::{assert_no_lints, assert_suggestion_result};
|
||||
|
||||
#[test]
|
||||
fn fix_can_i_every() {
|
||||
assert_suggestion_result(
|
||||
"Odd, how can i every become negative in that case?",
|
||||
EverEvery::default(),
|
||||
"Odd, how can i ever become negative in that case?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_can_they_every() {
|
||||
assert_suggestion_result(
|
||||
"if each component has its own instance of NameService, how can they every share state?",
|
||||
EverEvery::default(),
|
||||
"if each component has its own instance of NameService, how can they ever share state?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_can_we_every() {
|
||||
assert_suggestion_result(
|
||||
"can we every have a good dev UX?",
|
||||
EverEvery::default(),
|
||||
"can we ever have a good dev UX?",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_did_we_every() {
|
||||
assert_suggestion_result(
|
||||
"Did we every fix that?",
|
||||
EverEvery::default(),
|
||||
"Did we ever fix that?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_did_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Did you every get vtsls working properly?",
|
||||
EverEvery::default(),
|
||||
"Did you ever get vtsls working properly?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_i_every() {
|
||||
assert_suggestion_result(
|
||||
"Rarely do I every look forward to the new ui.",
|
||||
EverEvery::default(),
|
||||
"Rarely do I ever look forward to the new ui.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_we_every() {
|
||||
assert_suggestion_result(
|
||||
"do we every stop learning new things?",
|
||||
EverEvery::default(),
|
||||
"do we ever stop learning new things?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_do_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Do you every faced the issue or have any idea why this could happen?",
|
||||
EverEvery::default(),
|
||||
"Do you ever faced the issue or have any idea why this could happen?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_dont_i_every() {
|
||||
assert_suggestion_result(
|
||||
"WHY DONT I EVERY SEE OR HEAR ABOUT THINGS HAPPENING IN SOUTHPORT?",
|
||||
EverEvery::default(),
|
||||
"WHY DONT I EVER SEE OR HEAR ABOUT THINGS HAPPENING IN SOUTHPORT?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_dont_they_every() {
|
||||
assert_suggestion_result(
|
||||
"And why dont they every smile first?",
|
||||
EverEvery::default(),
|
||||
"And why dont they ever smile first?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_dont_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Dont you every forget this and believe nothing else.",
|
||||
EverEvery::default(),
|
||||
"Dont you ever forget this and believe nothing else.",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_have_you_every() {
|
||||
assert_suggestion_result(
|
||||
"Have you every wanted to generate geometric structures from data.frames",
|
||||
EverEvery::default(),
|
||||
"Have you ever wanted to generate geometric structures from data.frames",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_should_i_every() {
|
||||
assert_suggestion_result(
|
||||
"I.e. why would I every use deepcopy ?",
|
||||
EverEvery::default(),
|
||||
"I.e. why would I ever use deepcopy ?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_should_we_every() {
|
||||
assert_suggestion_result(
|
||||
"Should we every meet, I'll get you a beverage of your choosing!",
|
||||
EverEvery::default(),
|
||||
"Should we ever meet, I'll get you a beverage of your choosing!",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_should_you_every() {
|
||||
assert_suggestion_result(
|
||||
"but you will always have a place in his home should you every truly desire it",
|
||||
EverEvery::default(),
|
||||
"but you will always have a place in his home should you ever truly desire it",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_would_i_every() {
|
||||
assert_suggestion_result(
|
||||
"Why would I every do that?",
|
||||
EverEvery::default(),
|
||||
"Why would I ever do that?",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_would_they_every() {
|
||||
assert_suggestion_result(
|
||||
"Would they every be installed together?",
|
||||
EverEvery::default(),
|
||||
"Would they ever be installed together?",
|
||||
)
|
||||
}
|
||||
|
||||
// known false positive - future contributors: please feel free to tackle this!
|
||||
|
||||
#[test]
|
||||
#[ignore = "unusual but not wrong position of time phrase, maybe should have commas?"]
|
||||
fn dont_flag_should_we_every() {
|
||||
assert_no_lints(
|
||||
"MM: should we every month or two have a roundup of what's been happening in WGSL",
|
||||
EverEvery::default(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ use crate::expr::All;
|
|||
use crate::expr::Expr;
|
||||
use crate::expr::LongestMatchOf;
|
||||
use crate::expr::SequenceExpr;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::{Lrc, Punctuation, Token, TokenKind, TokenStringExt, patterns::Word};
|
||||
|
||||
pub struct Everyday {
|
||||
|
|
@ -12,24 +13,19 @@ pub struct Everyday {
|
|||
impl Default for Everyday {
|
||||
fn default() -> Self {
|
||||
let everyday = Word::new("everyday");
|
||||
let every_day = Lrc::new(SequenceExpr::default().t_aco("every").t_ws().t_aco("day"));
|
||||
let every_day = Lrc::new(SequenceExpr::aco("every").t_ws().t_aco("day"));
|
||||
|
||||
let everyday_bad_after =
|
||||
All::new(vec![
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.then(everyday.clone())
|
||||
.t_ws()
|
||||
.then_any_word(),
|
||||
),
|
||||
Box::new(SequenceExpr::default().t_any().t_any().then(
|
||||
|tok: &Token, _src: &[char]| {
|
||||
!tok.kind.is_noun()
|
||||
&& !tok.kind.is_oov()
|
||||
&& !tok.kind.is_verb_progressive_form()
|
||||
},
|
||||
)),
|
||||
]);
|
||||
let everyday_bad_after = All::new(vec![
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.then(everyday.clone())
|
||||
.t_ws()
|
||||
.then_any_word(),
|
||||
),
|
||||
Box::new(SequenceExpr::anything().t_any().then_kind_where(|kind| {
|
||||
!kind.is_noun() && !kind.is_oov() && !kind.is_verb_progressive_form()
|
||||
})),
|
||||
]);
|
||||
|
||||
let bad_before_every_day = All::new(vec![
|
||||
Box::new(
|
||||
|
|
@ -55,8 +51,7 @@ impl Default for Everyday {
|
|||
.then_any_word(),
|
||||
),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.t_any()
|
||||
SequenceExpr::anything()
|
||||
.t_any()
|
||||
.then_kind_both(TokenKind::is_noun, TokenKind::is_verb)
|
||||
.t_any()
|
||||
|
|
@ -71,18 +66,14 @@ impl Default for Everyday {
|
|||
.then(everyday.clone())
|
||||
.then_punctuation(),
|
||||
),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
.t_any()
|
||||
.then(|tok: &Token, _src: &[char]| {
|
||||
matches!(
|
||||
tok.kind,
|
||||
TokenKind::Punctuation(
|
||||
Punctuation::Question | Punctuation::Comma | Punctuation::Period
|
||||
)
|
||||
)
|
||||
}),
|
||||
),
|
||||
Box::new(SequenceExpr::anything().then_kind_where(|kind| {
|
||||
matches!(
|
||||
kind,
|
||||
TokenKind::Punctuation(
|
||||
Punctuation::Question | Punctuation::Comma | Punctuation::Period
|
||||
)
|
||||
)
|
||||
})),
|
||||
]);
|
||||
|
||||
// (However, the message goes far beyond) every day things.
|
||||
|
|
@ -91,19 +82,18 @@ impl Default for Everyday {
|
|||
SequenceExpr::default()
|
||||
.then(every_day.clone())
|
||||
.t_ws()
|
||||
.then_noun()
|
||||
.then_plural_noun()
|
||||
.then_punctuation(),
|
||||
),
|
||||
Box::new(
|
||||
SequenceExpr::default()
|
||||
SequenceExpr::anything()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.t_any()
|
||||
.then(|tok: &Token, _src: &[char]| {
|
||||
.then_kind_where(|kind| {
|
||||
matches!(
|
||||
tok.kind,
|
||||
kind,
|
||||
TokenKind::Punctuation(
|
||||
Punctuation::Question | Punctuation::Comma | Punctuation::Period
|
||||
)
|
||||
|
|
@ -147,6 +137,8 @@ impl Default for Everyday {
|
|||
}
|
||||
|
||||
impl ExprLinter for Everyday {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
@ -215,7 +207,7 @@ impl ExprLinter for Everyday {
|
|||
mod tests {
|
||||
use super::Everyday;
|
||||
use crate::linting::tests::{
|
||||
assert_lint_count, assert_suggestion_result, assert_top3_suggestion_result,
|
||||
assert_lint_count, assert_no_lints, assert_suggestion_result, assert_top3_suggestion_result,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
|
@ -506,4 +498,9 @@ mod tests {
|
|||
"MEET SOMEONE NEW EVERY DAY.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dont_flag_every_day_singular_noun_2020() {
|
||||
assert_no_lints("50 requests per day, every day free.", Everyday::default());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ use super::{ExprLinter, Lint, LintKind};
|
|||
use crate::Token;
|
||||
use crate::expr::{Expr, SequenceExpr, SpaceOrHyphen};
|
||||
use crate::linting::Suggestion;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::patterns::{ImpliesQuantity, WordSet};
|
||||
|
||||
pub struct ExpandMemoryShorthands {
|
||||
|
|
@ -74,6 +75,8 @@ impl Default for ExpandMemoryShorthands {
|
|||
}
|
||||
|
||||
impl ExprLinter for ExpandMemoryShorthands {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use std::sync::Arc;
|
|||
use super::{ExprLinter, Lint, LintKind};
|
||||
use crate::Token;
|
||||
use crate::linting::Suggestion;
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::patterns::{ImpliesQuantity, WordSet};
|
||||
|
||||
pub struct ExpandTimeShorthands {
|
||||
|
|
@ -57,6 +58,8 @@ impl Default for ExpandTimeShorthands {
|
|||
}
|
||||
|
||||
impl ExprLinter for ExpandTimeShorthands {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,19 +5,71 @@ use crate::{Document, LSend, Token, TokenStringExt};
|
|||
|
||||
use super::{Lint, Linter};
|
||||
|
||||
pub trait DocumentIterator {
|
||||
type Unit;
|
||||
|
||||
fn iter_units<'a>(document: &'a Document) -> Box<dyn Iterator<Item = &'a [Token]> + 'a>;
|
||||
}
|
||||
|
||||
/// Process text in chunks (clauses between commas)
|
||||
pub struct Chunk;
|
||||
/// Process text in full sentences
|
||||
pub struct Sentence;
|
||||
|
||||
impl DocumentIterator for Chunk {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn iter_units<'a>(document: &'a Document) -> Box<dyn Iterator<Item = &'a [Token]> + 'a> {
|
||||
Box::new(document.iter_chunks())
|
||||
}
|
||||
}
|
||||
|
||||
impl DocumentIterator for Sentence {
|
||||
type Unit = Sentence;
|
||||
|
||||
fn iter_units<'a>(document: &'a Document) -> Box<dyn Iterator<Item = &'a [Token]> + 'a> {
|
||||
Box::new(document.iter_sentences())
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait that searches for tokens that fulfil [`Expr`]s in a [`Document`].
|
||||
///
|
||||
/// Makes use of [`TokenStringExt::iter_chunks`] to avoid matching across sentence or clause
|
||||
/// boundaries.
|
||||
/// Makes use of [`TokenStringExt::iter_chunks`] by default, or [`TokenStringExt::iter_sentences`] to process either
|
||||
/// a chunk (clause) or a sentence at a time.
|
||||
#[blanket(derive(Box))]
|
||||
pub trait ExprLinter: LSend {
|
||||
type Unit: DocumentIterator;
|
||||
|
||||
/// A simple getter for the expression you want Harper to search for.
|
||||
fn expr(&self) -> &dyn Expr;
|
||||
/// If any portions of a [`Document`] match [`Self::expr`], they are passed through [`ExprLinter::match_to_lint`] to be
|
||||
/// transformed into a [`Lint`] for editor consumption.
|
||||
/// If any portions of a [`Document`] match [`Self::expr`], they are passed through [`ExprLinter::match_to_lint`]
|
||||
/// or [`ExprLinter::match_to_lint_with_context`] to be transformed into a [`Lint`] for editor consumption.
|
||||
///
|
||||
/// This function may return `None` to elect _not_ to produce a lint.
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint>;
|
||||
/// Transform matched tokens into a [`Lint`] for editor consumption.
|
||||
///
|
||||
/// This is the simple version that only sees the matched tokens. For context-aware linting,
|
||||
/// implement `match_to_lint_with_context` instead.
|
||||
///
|
||||
/// Return `None` to skip producing a lint for this match.
|
||||
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Option<Lint> {
|
||||
self.match_to_lint_with_context(matched_tokens, source, None)
|
||||
}
|
||||
|
||||
/// Transform matched tokens into a [`Lint`] with access to surrounding context.
|
||||
///
|
||||
/// The context provides access to tokens before and after the match. When implementing
|
||||
/// this method, you can call `self.match_to_lint()` as a fallback if the context isn't needed.
|
||||
///
|
||||
/// Return `None` to skip producing a lint for this match.
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
matched_tokens: &[Token],
|
||||
source: &[char],
|
||||
_context: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
// Default implementation falls back to the simple version
|
||||
self.match_to_lint(matched_tokens, source)
|
||||
}
|
||||
/// A user-facing description of what kinds of grammatical errors this rule looks for.
|
||||
/// It is usually shown in settings menus.
|
||||
fn description(&self) -> &str;
|
||||
|
|
@ -42,16 +94,17 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<L> Linter for L
|
||||
impl<L, U> Linter for L
|
||||
where
|
||||
L: ExprLinter,
|
||||
L: ExprLinter<Unit = U>,
|
||||
U: DocumentIterator,
|
||||
{
|
||||
fn lint(&mut self, document: &Document) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
let source = document.get_source();
|
||||
|
||||
for chunk in document.iter_chunks() {
|
||||
lints.extend(run_on_chunk(self, chunk, source));
|
||||
for unit in U::iter_units(document) {
|
||||
lints.extend(run_on_chunk(self, unit, source));
|
||||
}
|
||||
|
||||
lints
|
||||
|
|
@ -64,13 +117,183 @@ where
|
|||
|
||||
pub fn run_on_chunk<'a>(
|
||||
linter: &'a impl ExprLinter,
|
||||
chunk: &'a [Token],
|
||||
unit: &'a [Token],
|
||||
source: &'a [char],
|
||||
) -> impl Iterator<Item = Lint> + 'a {
|
||||
linter
|
||||
.expr()
|
||||
.iter_matches(chunk, source)
|
||||
.iter_matches(unit, source)
|
||||
.filter_map(|match_span| {
|
||||
linter.match_to_lint(&chunk[match_span.start..match_span.end], source)
|
||||
linter.match_to_lint_with_context(
|
||||
&unit[match_span.start..match_span.end],
|
||||
source,
|
||||
Some((&unit[..match_span.start], &unit[match_span.end..])),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests_context {
|
||||
use crate::expr::{Expr, FixedPhrase};
|
||||
use crate::linting::expr_linter::{Chunk, Sentence};
|
||||
use crate::linting::tests::assert_suggestion_result;
|
||||
use crate::linting::{ExprLinter, Suggestion};
|
||||
use crate::token_string_ext::TokenStringExt;
|
||||
use crate::{Lint, Token};
|
||||
|
||||
pub struct TestSimpleLinter {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for TestSimpleLinter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(FixedPhrase::from_phrase("two")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for TestSimpleLinter {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
&*self.expr
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], _src: &[char]) -> Option<Lint> {
|
||||
Some(Lint {
|
||||
span: toks.span()?,
|
||||
message: "simple".to_string(),
|
||||
suggestions: vec![Suggestion::ReplaceWith(vec!['2'])],
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"test linter"
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestContextLinter {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for TestContextLinter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(FixedPhrase::from_phrase("two")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for TestContextLinter {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
&*self.expr
|
||||
}
|
||||
|
||||
fn match_to_lint_with_context(
|
||||
&self,
|
||||
toks: &[Token],
|
||||
src: &[char],
|
||||
context: Option<(&[Token], &[Token])>,
|
||||
) -> Option<Lint> {
|
||||
if let Some((before, after)) = context {
|
||||
let before = before.span()?.get_content_string(src);
|
||||
let after = after.span()?.get_content_string(src);
|
||||
|
||||
let (message, suggestions) = if before.eq_ignore_ascii_case("one ")
|
||||
&& after.eq_ignore_ascii_case(" three")
|
||||
{
|
||||
(
|
||||
"ascending".to_string(),
|
||||
vec![Suggestion::ReplaceWith(vec!['>'])],
|
||||
)
|
||||
} else if before.eq_ignore_ascii_case("three ")
|
||||
&& after.eq_ignore_ascii_case(" one")
|
||||
{
|
||||
(
|
||||
"descending".to_string(),
|
||||
vec![Suggestion::ReplaceWith(vec!['<'])],
|
||||
)
|
||||
} else {
|
||||
(
|
||||
"dunno".to_string(),
|
||||
vec![Suggestion::ReplaceWith(vec!['?'])],
|
||||
)
|
||||
};
|
||||
|
||||
return Some(Lint {
|
||||
span: toks.span()?,
|
||||
message,
|
||||
suggestions,
|
||||
..Default::default()
|
||||
});
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"context linter"
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestSentenceLinter {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for TestSentenceLinter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(FixedPhrase::from_phrase("two, two")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for TestSentenceLinter {
|
||||
type Unit = Sentence;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], _src: &[char]) -> Option<Lint> {
|
||||
Some(Lint {
|
||||
span: toks.span()?,
|
||||
message: "sentence".to_string(),
|
||||
suggestions: vec![Suggestion::ReplaceWith(vec!['2', '&', '2'])],
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"sentence linter"
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_test_123() {
|
||||
assert_suggestion_result("one two three", TestSimpleLinter::default(), "one 2 three");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_test_123() {
|
||||
assert_suggestion_result("one two three", TestContextLinter::default(), "one > three");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_test_321() {
|
||||
assert_suggestion_result("three two one", TestContextLinter::default(), "three < one");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sentence_test_123() {
|
||||
assert_suggestion_result(
|
||||
"one, two, two, three",
|
||||
TestSentenceLinter::default(),
|
||||
"one, 2&2, three",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
126
harper-core/src/linting/far_be_it.rs
Normal file
126
harper-core/src/linting/far_be_it.rs
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
use crate::char_string::CharStringExt;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
use crate::token::Token;
|
||||
|
||||
pub struct FarBeIt {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for FarBeIt {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(
|
||||
SequenceExpr::default()
|
||||
.t_aco("far")
|
||||
.t_ws()
|
||||
.t_aco("be")
|
||||
.t_ws()
|
||||
.t_aco("it")
|
||||
.t_ws()
|
||||
.then_word_except(&["from"]),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for FarBeIt {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let span = toks.last()?.span;
|
||||
let content = span.get_content(src);
|
||||
|
||||
// We can only correct using `far be it for`, otherwise we recommend rephrasing the sentence.
|
||||
let (suggestions, message) = if span.get_content(src).eq_ignore_ascii_case_str("for") {
|
||||
(
|
||||
vec![Suggestion::replace_with_match_case(
|
||||
vec!['f', 'r', 'o', 'm'],
|
||||
content,
|
||||
)],
|
||||
"`Far be it for` is a common error for `far be it from`".to_string(),
|
||||
)
|
||||
} else {
|
||||
(vec![], "The correct usage of the idiom is `far be it from` [someone] to [do something]. Try to rephrase the sentence.".to_string())
|
||||
};
|
||||
|
||||
Some(Lint {
|
||||
span,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions,
|
||||
message,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Flags misuse of `far be it` and suggests using `from` when it is followed by `for`"
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::FarBeIt;
|
||||
use crate::linting::tests::{
|
||||
assert_no_lints, assert_suggestion_count, assert_suggestion_result,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn far_be_it_for_me_capitalized() {
|
||||
assert_suggestion_result(
|
||||
"Far be it for me to suggestion that additional cardinality be added to the already TOO MUCH CARDINALITY metric space.",
|
||||
FarBeIt::default(),
|
||||
"Far be it from me to suggestion that additional cardinality be added to the already TOO MUCH CARDINALITY metric space.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn far_be_it_for_me_lowercase() {
|
||||
assert_suggestion_result(
|
||||
"Far be it for me to tell people what to do so I'm not earnestly proposing to take away the ability to add literals to lazyframes.",
|
||||
FarBeIt::default(),
|
||||
"Far be it from me to tell people what to do so I'm not earnestly proposing to take away the ability to add literals to lazyframes.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn far_be_it_that() {
|
||||
assert_suggestion_count(
|
||||
"Far be it that I get in the middle of this thread (and the complexity WebAuthn has spawned)",
|
||||
FarBeIt::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn far_be_it_for_the_software() {
|
||||
assert_suggestion_result(
|
||||
"Far be it for the software to give any indication of that fact.",
|
||||
FarBeIt::default(),
|
||||
"Far be it from the software to give any indication of that fact.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "No punctuation between '... so far' and 'be it ...'"]
|
||||
fn missing_punctuation_false_positive() {
|
||||
assert_no_lints(
|
||||
"but it is failing for master and all the 11.x branches i have tried so far be it 11.0.0, 11.0.1 ...",
|
||||
FarBeIt::default(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn far_be_it_to() {
|
||||
assert_suggestion_count(
|
||||
"I'm not a marketing guy, so far be it to second guess that.",
|
||||
FarBeIt::default(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
}
|
||||
184
harper-core/src/linting/fascinated_by.rs
Normal file
184
harper-core/src/linting/fascinated_by.rs
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
use crate::{
|
||||
CharStringExt, Lint, Token,
|
||||
expr::{Expr, SequenceExpr},
|
||||
linting::{ExprLinter, LintKind, Suggestion, expr_linter::Chunk},
|
||||
};
|
||||
|
||||
pub struct FascinatedBy {
|
||||
expr: Box<dyn Expr>,
|
||||
}
|
||||
|
||||
impl Default for FascinatedBy {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
expr: Box::new(SequenceExpr::aco("fascinated").t_ws().then_preposition()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprLinter for FascinatedBy {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
||||
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
|
||||
let prep_span = toks.last()?.span;
|
||||
let prep_chars = prep_span.get_content(src);
|
||||
if prep_chars.eq_any_ignore_ascii_case_str(&["by", "with"]) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Lint {
|
||||
span: prep_span,
|
||||
lint_kind: LintKind::Usage,
|
||||
suggestions: vec![
|
||||
Suggestion::replace_with_match_case_str("by", prep_chars),
|
||||
Suggestion::replace_with_match_case_str("with", prep_chars),
|
||||
],
|
||||
message: "The correct prepositions to use with `fascinated` are `by` or `with`."
|
||||
.to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Ensures the correct prepositions are used with `fascinated` (e.g., `fascinated by` or `fascinated with`)."
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::linting::{fascinated_by::FascinatedBy, tests::assert_good_and_bad_suggestions};
|
||||
|
||||
#[test]
|
||||
fn fix_amiga() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Now, one aspect of the Amiga that I've always been fascinated about is making my own games for the Amiga.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Now, one aspect of the Amiga that I've always been fascinated by is making my own games for the Amiga.",
|
||||
"Now, one aspect of the Amiga that I've always been fascinated with is making my own games for the Amiga.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_microbit() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"also why I am very fascinated about the micro:bit itself",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"also why I am very fascinated by the micro:bit itself",
|
||||
"also why I am very fascinated with the micro:bit itself",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_software_development() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Self-learner, fascinated about software development, especially computer graphics and web - marcus-phi.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Self-learner, fascinated by software development, especially computer graphics and web - marcus-phi.",
|
||||
"Self-learner, fascinated with software development, especially computer graphics and web - marcus-phi.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_computer_science() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Fascinated about Computer Science, Finance and Statistics.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Fascinated by Computer Science, Finance and Statistics.",
|
||||
"Fascinated with Computer Science, Finance and Statistics.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_possibilities() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"m relatively new to deCONZ and Conbee2 but already very fascinated about the possibilities compared to Philips and Ikea's",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"m relatively new to deCONZ and Conbee2 but already very fascinated by the possibilities compared to Philips and Ikea's",
|
||||
"m relatively new to deCONZ and Conbee2 but already very fascinated with the possibilities compared to Philips and Ikea's",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_project() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"I have been using browser use in local mode for a while and i am pretty fascinated about the project.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"I have been using browser use in local mode for a while and i am pretty fascinated by the project.",
|
||||
"I have been using browser use in local mode for a while and i am pretty fascinated with the project.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_work() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Hey guys, I am really fascinated about your work and I tried to build Magisk so I will be able to contribute for the project.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Hey guys, I am really fascinated by your work and I tried to build Magisk so I will be able to contribute for the project.",
|
||||
"Hey guys, I am really fascinated with your work and I tried to build Magisk so I will be able to contribute for the project.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_ais() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"I am a retired Dutch telecom engineer and fascinated about AIS applications.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"I am a retired Dutch telecom engineer and fascinated by AIS applications.",
|
||||
"I am a retired Dutch telecom engineer and fascinated with AIS applications.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_innovative_ideas() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"Software Developer fascinated about innovative ideas, love to learn and share new technologies and ideas.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"Software Developer fascinated by innovative ideas, love to learn and share new technologies and ideas.",
|
||||
"Software Developer fascinated with innovative ideas, love to learn and share new technologies and ideas.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fix_coding() {
|
||||
assert_good_and_bad_suggestions(
|
||||
"m fascinated about coding and and sharing my code to the world.",
|
||||
FascinatedBy::default(),
|
||||
&[
|
||||
"m fascinated by coding and and sharing my code to the world.",
|
||||
"m fascinated with coding and and sharing my code to the world.",
|
||||
][..],
|
||||
&[],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::Token;
|
||||
use crate::char_string::CharStringExt;
|
||||
use crate::expr::{Expr, SequenceExpr};
|
||||
use crate::linting::expr_linter::Chunk;
|
||||
use crate::linting::expr_linter::find_the_only_token_matching;
|
||||
use crate::linting::{ExprLinter, Lint, LintKind, Suggestion};
|
||||
|
||||
|
|
@ -42,6 +43,8 @@ impl Default for FeelFell {
|
|||
}
|
||||
|
||||
impl ExprLinter for FeelFell {
|
||||
type Unit = Chunk;
|
||||
|
||||
fn expr(&self) -> &dyn Expr {
|
||||
self.expr.as_ref()
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue