mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
feat: adding prefixes to dictionary (#2212)
* feat: adding prefixes to dictionary * feat: `AffixData` for `DictWordMetadata`
This commit is contained in:
parent
6ac8406e29
commit
f15778ed28
3 changed files with 112 additions and 51 deletions
|
|
@ -997,6 +997,14 @@
|
|||
"metadata": {
|
||||
"//": "not yet implemented"
|
||||
}
|
||||
},
|
||||
"(": {
|
||||
"#": "prefix property",
|
||||
"metadata": {
|
||||
"affix": {
|
||||
"is_prefix": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11157,7 +11157,7 @@ Zworykin/g
|
|||
Zyrtec/g
|
||||
Zyuganov/g
|
||||
Zzz
|
||||
a/~DP
|
||||
a/~DP(
|
||||
a.m./
|
||||
aah/NV
|
||||
aardvark/~NSg
|
||||
|
|
@ -12585,7 +12585,7 @@ antagonist/~NSg
|
|||
antagonistic/~JQ
|
||||
antagonize/VdSG
|
||||
antarctic/~J
|
||||
ante/~NSgV
|
||||
ante/~NSgV(
|
||||
anteater/NgS
|
||||
antebellum/~J
|
||||
antecedence/Nmg
|
||||
|
|
@ -12620,7 +12620,7 @@ anthropomorphise/V!_
|
|||
anthropomorphism/Nmg
|
||||
anthropomorphize/V
|
||||
anthropomorphous/J
|
||||
anti/~JNSgP
|
||||
anti/~JNSgP(
|
||||
antiabortion/J
|
||||
antiabortionist/NgS
|
||||
antiaircraft/JN
|
||||
|
|
@ -12949,7 +12949,7 @@ arbutus/NgS
|
|||
arc/~NSgVdG
|
||||
arcade/~NgSV
|
||||
arcane/~J
|
||||
arch/~NgSVGd>J^YpZv
|
||||
arch/~NgSVGd>J^YpZv(
|
||||
archaeological/~JY
|
||||
archaeologist/~NSg
|
||||
archaeology/~Nmg
|
||||
|
|
@ -13508,7 +13508,7 @@ authorized/~JVtT
|
|||
authorship/~Nmg
|
||||
autism/~Nmg
|
||||
autistic/~JN
|
||||
auto/~JNgSV
|
||||
auto/~JNgSV(
|
||||
autobahn/~NSg
|
||||
autobiographer/NSg
|
||||
autobiographic/J
|
||||
|
|
@ -14436,6 +14436,7 @@ bend/~VbG>SNgBZ
|
|||
bendability/Nmg
|
||||
bender/~Ng
|
||||
bendy/J^>N
|
||||
# bene # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
beneath/~P
|
||||
benedictine/~
|
||||
benediction/NwSg
|
||||
|
|
@ -14572,7 +14573,7 @@ bezel/NgS
|
|||
bezier/NgS
|
||||
bf/~N
|
||||
bhaji/N
|
||||
bi/~J>NSgZ
|
||||
bi/~J>NSgZ(
|
||||
biannual/JYN
|
||||
bias/~NgSVGdJ
|
||||
biased/~JVtTU
|
||||
|
|
@ -16892,6 +16893,7 @@ center/~NgJVdG
|
|||
centerboard/NSg
|
||||
centerfold/NgS
|
||||
centerpiece/~NgS
|
||||
# centi # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
centigrade/JN
|
||||
centigram/NSg
|
||||
centiliter/NgS<
|
||||
|
|
@ -17554,6 +17556,7 @@ circularize/VdSG
|
|||
circulate/~VdSGr
|
||||
circulation/~NSgr
|
||||
circulatory/JN
|
||||
# circum # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
circumcise/VdSGXn
|
||||
circumcised/JVtTNU
|
||||
circumcision/~Ng
|
||||
|
|
@ -17913,7 +17916,7 @@ clxvi
|
|||
clxvii
|
||||
cm/~
|
||||
cnidarian/NgS
|
||||
co/~NSIdE
|
||||
co/~NSIdE(
|
||||
coach/~NgSVdG
|
||||
coachload/NS
|
||||
coachman/N0g
|
||||
|
|
@ -20368,6 +20371,7 @@ dc/~N
|
|||
dd/~NSdG
|
||||
dded/K
|
||||
dding/K
|
||||
# de # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
deacon/~NgSV
|
||||
deaconess/NgS
|
||||
dead/~J^Y>NgVXn
|
||||
|
|
@ -20439,6 +20443,7 @@ debtor/~NgS
|
|||
debugger/NSg
|
||||
debut/~NgVGd
|
||||
debutante/NSg
|
||||
# deca # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
decade/~NgS
|
||||
decadence/~Nmg
|
||||
decadency/Nmg
|
||||
|
|
@ -21117,6 +21122,7 @@ dextrose/Nmg
|
|||
dharma/~NwgS
|
||||
dhoti/NSg
|
||||
dhow/NgS
|
||||
# di # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
diabetes/~Nmg
|
||||
diabetic/~JNSg
|
||||
diabolic/J
|
||||
|
|
@ -21406,7 +21412,7 @@ dirtball/NS
|
|||
dirtily/Ry
|
||||
dirtiness/Nmg
|
||||
dirty/~J^>VdSGp
|
||||
dis/~VNgI
|
||||
dis/~VNgI(
|
||||
disable/~VdSGJL
|
||||
disablement/Ng
|
||||
disambiguate/~VSdGn
|
||||
|
|
@ -22399,6 +22405,7 @@ dynamo/~NSg
|
|||
dynastic/~J
|
||||
dynasty/~NSg
|
||||
dyno/NSg
|
||||
# dys # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
dysentery/~Nmg
|
||||
dysfunction/~Nmg
|
||||
dysfunctional/~J
|
||||
|
|
@ -22989,7 +22996,7 @@ emulsification/Nmg
|
|||
emulsifier/~NgS
|
||||
emulsify/Vd>SGnZ
|
||||
emulsion/~NwgSV
|
||||
en/~NSgPI
|
||||
en/~NSgPI(
|
||||
enable/~Vd>SGZ
|
||||
enabler/NgS
|
||||
enact/~VSdGrL
|
||||
|
|
@ -23575,6 +23582,7 @@ etude/NSg
|
|||
etymological/~JY
|
||||
etymologist/NSg
|
||||
etymology/~NwSg
|
||||
# eu # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
eucalypti/N9
|
||||
eucalyptus/~N0gS
|
||||
euchre/NSgVdG
|
||||
|
|
@ -23682,7 +23690,7 @@ evolutionist/NSg
|
|||
evolve/~VdSG
|
||||
ewe/~NSg>Z
|
||||
ewer/Ng
|
||||
ex/~NgSVJ
|
||||
ex/~NgSVJ(
|
||||
exabyte/NgS
|
||||
exacerbate/VGdSn
|
||||
exacerbation/Nwg
|
||||
|
|
@ -24032,7 +24040,7 @@ extortion/~Ng>Z
|
|||
extortionate/JY
|
||||
extortioner/Ng
|
||||
extortionist/NgS
|
||||
extra/~JNSg
|
||||
extra/~JNSg(
|
||||
extracellular/~J
|
||||
extract/~NgSVdGv
|
||||
extraction/~NwSg
|
||||
|
|
@ -25314,7 +25322,7 @@ forceps/N09g
|
|||
forcible/~J
|
||||
forcibly/~Ry
|
||||
ford/~NgSVdGB
|
||||
fore/~JNgS
|
||||
fore/~JNgS(
|
||||
forearm/~NSgVGd
|
||||
forebear/NgSV
|
||||
forebode/VGdSNz
|
||||
|
|
@ -28126,7 +28134,7 @@ hesitate/~VdSGnX
|
|||
hesitating/VNYU
|
||||
hesitation/~Ng
|
||||
hessian/~N
|
||||
hetero/~JNSg
|
||||
hetero/~JNSg(
|
||||
heterodox/J
|
||||
heterodoxy/Nmg
|
||||
heterogeneity/Ng
|
||||
|
|
@ -28427,7 +28435,7 @@ homily/~NSg
|
|||
hominid/NSgJ
|
||||
hominoid/NS
|
||||
hominy/Ng
|
||||
homo/~NgSJ
|
||||
homo/~NgSJ(
|
||||
homoerotic/J
|
||||
homogeneity/Ng
|
||||
homogeneous/~JY
|
||||
|
|
@ -28917,6 +28925,7 @@ hymn/~NgSVdG
|
|||
hymnal/~NgSJ
|
||||
hymnbook/NSg
|
||||
hype/~NmgSVGd>J
|
||||
# hyper # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
hyperactive/J
|
||||
hyperactivity/~Ng
|
||||
hyperaggressive/J
|
||||
|
|
@ -29113,6 +29122,7 @@ ignore/~VGdS
|
|||
iguana/~NgS
|
||||
ii/~
|
||||
iii/~
|
||||
# il # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
ilea/N
|
||||
ileitis/Ng
|
||||
ileum/Ng
|
||||
|
|
@ -29153,6 +29163,7 @@ illustrative/~JY
|
|||
illustrator/~NSg
|
||||
illustrious/~JYp
|
||||
illustriousness/Nmg
|
||||
# im # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
image/~NwSgVdG
|
||||
imager/NgS
|
||||
imagery/~Nmg
|
||||
|
|
@ -29464,7 +29475,7 @@ impure/~JY^>V
|
|||
impurity/~NSg
|
||||
imputation/NSg
|
||||
impute/VdSGB
|
||||
in/~PJRrg # removed `4`, verb senses are obsolete, `NS`, noun sense is marginal
|
||||
in/~PJRrg( # removed `4`, verb senses are obsolete, `NS`, noun sense is marginal
|
||||
inaccuracy/NwgS
|
||||
inaction/~Nmg
|
||||
inadequacy/NS
|
||||
|
|
@ -30114,7 +30125,7 @@ intent/~NSgJYp
|
|||
intention/~NgSV
|
||||
intentional/~JYNU
|
||||
intentness/Ng
|
||||
inter/~VSEL
|
||||
inter/~VSEL(
|
||||
interact/~VGdSNv
|
||||
interaction/~NwSg
|
||||
interactive/~JYN
|
||||
|
|
@ -30298,6 +30309,7 @@ intonation/~NSg
|
|||
intoxicant/NSgJ
|
||||
intoxicate/VdSGJn
|
||||
intoxication/~Ng
|
||||
# intra # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
intracranial/~J
|
||||
intramural/~JN
|
||||
intramuscular/J
|
||||
|
|
@ -30316,7 +30328,7 @@ intriguer/Ng
|
|||
intriguing/~JYV6N
|
||||
intrinsic/~JNgS
|
||||
intrinsically/~Ry
|
||||
intro/~NSgV
|
||||
intro/~NSgV(
|
||||
introduce/~VGdSr
|
||||
introduction/~N0gr
|
||||
introductions/~N9
|
||||
|
|
@ -31100,7 +31112,7 @@ killer/~NgJ
|
|||
killing/~JNgV
|
||||
killjoy/NSg
|
||||
kiln/~NgSVdG
|
||||
kilo/~NgS
|
||||
kilo/~NgS(
|
||||
kilobit/NSg
|
||||
kilobyte/NSg
|
||||
kilocoulomb/S
|
||||
|
|
@ -32654,7 +32666,7 @@ mackerel/~NwSg
|
|||
mackinaw/NSg
|
||||
mackintosh/~NgS
|
||||
macrame/NgV
|
||||
macro/~JNSg
|
||||
macro/~JNSg(
|
||||
macroaggregate/Ng
|
||||
macrobiotic/JS
|
||||
macrobiotics/Nwg
|
||||
|
|
@ -33383,7 +33395,7 @@ meeting/~NwgSV
|
|||
meetinghouse/NSg
|
||||
meetup/NgS
|
||||
meg/~NSV
|
||||
mega/~JN
|
||||
mega/~JN(
|
||||
megabit/NSg
|
||||
megabucks/Ng
|
||||
megabyte/NgS
|
||||
|
|
@ -33700,7 +33712,7 @@ mica/~Ng
|
|||
mice/~N9V
|
||||
mick/~NSJ
|
||||
mickey/~NgSV
|
||||
micro/~JNSgV
|
||||
micro/~JNSgV(
|
||||
microaggression/NSg
|
||||
microarchitecture/NgS
|
||||
microbe/NgS
|
||||
|
|
@ -33762,7 +33774,7 @@ microtransaction/NSg
|
|||
microvascular/J
|
||||
microwave/~NSgVdGB
|
||||
microwaveable/J
|
||||
mid/~JPN
|
||||
mid/~JPN(
|
||||
midair/J
|
||||
midcentury/J
|
||||
midday/~Ng
|
||||
|
|
@ -33864,6 +33876,7 @@ millennial/JNgS
|
|||
millennium/~NgS
|
||||
miller/~Ng
|
||||
millet/~Ng
|
||||
# milli # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
milliamp/NgS
|
||||
milliard/Sg
|
||||
millibar/NgS
|
||||
|
|
@ -33929,7 +33942,7 @@ minestrone/Nmg
|
|||
minesweeper/NSg
|
||||
mingle/VdGSN
|
||||
mingy/J
|
||||
mini/~JNgS
|
||||
mini/~JNgS(
|
||||
miniature/~NgSJV
|
||||
miniaturisation/Ng!_
|
||||
miniaturise/VGdS!_
|
||||
|
|
@ -34001,6 +34014,7 @@ mirthful/JYp
|
|||
mirthfulness/Nmg
|
||||
mirthless/JY
|
||||
miry/J>^
|
||||
# mis # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
misaddress/VdSG
|
||||
misadventure/NwgS
|
||||
misaligned/JV
|
||||
|
|
@ -34400,7 +34414,7 @@ monkey/~NgSVdG
|
|||
monkeyshine/NSg
|
||||
monkish/J
|
||||
monkshood/NSg
|
||||
mono/~NgJ
|
||||
mono/~NgJ(
|
||||
monochromatic/~J
|
||||
monochrome/~NgSJ
|
||||
monocle/NSgd
|
||||
|
|
@ -34774,7 +34788,7 @@ mullet/~NgS
|
|||
mulligan/~NSg
|
||||
mulligatawny/Ng
|
||||
mullion/NSgVd
|
||||
multi/~N
|
||||
multi/~N(
|
||||
multibillion/J
|
||||
multibyte/J
|
||||
multicellular/J
|
||||
|
|
@ -35273,6 +35287,7 @@ nelson/~NSg
|
|||
nematode/NSg
|
||||
nemeses/N9
|
||||
nemesis/~N0g
|
||||
# neo # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
neoclassic/J
|
||||
neoclassical/~JN
|
||||
neoclassicism/Nmg
|
||||
|
|
@ -35594,7 +35609,7 @@ nomination's/r
|
|||
nominative/~JNSg
|
||||
nominator/~NSge
|
||||
nominee/~NgS
|
||||
non/~N
|
||||
non/~N(
|
||||
nonabrasive/JN
|
||||
nonabsorbent/JSg
|
||||
nonacademic/JN
|
||||
|
|
@ -36462,6 +36477,7 @@ omission/~NwgS
|
|||
omit/~VS
|
||||
omitted/~V
|
||||
omitting/~VN
|
||||
# omni # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
omnibus/~NgSJV
|
||||
omnidirectional/J
|
||||
omnipotence/Nmg
|
||||
|
|
@ -36779,7 +36795,7 @@ ourself/Ia1F # I:pronoun a:personal 1:person .~singular F:reflexive (of t
|
|||
ourselves/~Ia1F: # I:pronoun a:personal 1:person :~plural F:reflexive
|
||||
oust/~VGd>SZ
|
||||
ouster/~NgSV
|
||||
out/~PNSgVGd>JRz
|
||||
out/~PNSgVGd>JRz(
|
||||
outage/NSg
|
||||
outargue/VGdS
|
||||
outback/~NgSJV
|
||||
|
|
@ -36942,7 +36958,7 @@ oven/~NgSV
|
|||
ovenbird/NSg
|
||||
ovenproof/J
|
||||
ovenware/Nmg
|
||||
over/~JYNgSP
|
||||
over/~JYNgSP(
|
||||
overabundance/Ng
|
||||
overabundant/J
|
||||
overachieve/VGd>SZ
|
||||
|
|
@ -37439,7 +37455,7 @@ pampas/Ng
|
|||
pamper/VdGSN
|
||||
pamphlet/~NgSV
|
||||
pamphleteer/NgSV
|
||||
pan/~NSgVJ
|
||||
pan/~NSgVJ(
|
||||
panacea/NSg
|
||||
panache/Ng
|
||||
panama/~NgS
|
||||
|
|
@ -37528,7 +37544,7 @@ paprika/~NmgJ
|
|||
papyri/~N9
|
||||
papyrus/~N0g
|
||||
par/~NSgJ>PVGdZBz
|
||||
para/~NgSJ
|
||||
para/~NgSJ(
|
||||
parable/~NgSVJ
|
||||
parabola/N0Sg
|
||||
parabolæ/N9
|
||||
|
|
@ -38137,6 +38153,7 @@ peppy/J^>Np
|
|||
pepsin/Ng
|
||||
peptic/JNgS
|
||||
peptide/~NS
|
||||
# per # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
peradventure/Ng
|
||||
perambulate/VGdSXn
|
||||
perambulation/Nwg
|
||||
|
|
@ -39241,7 +39258,7 @@ polonaise/NSgV
|
|||
polonium/Nmg
|
||||
poltergeist/~NgS
|
||||
poltroon/NSgJ
|
||||
poly/~NJV
|
||||
poly/~NJV(
|
||||
polyacrylamide/N
|
||||
polyamory/NS
|
||||
polyandrous/J
|
||||
|
|
@ -39448,7 +39465,7 @@ possibility/~NSg
|
|||
possible/~JNSg
|
||||
possibly/~R # adverb of probability/certainty/affirmation; modal adverb
|
||||
possum/~NSgV
|
||||
post/~NwgSVGd>PZz
|
||||
post/~NwgSVGd>PZz(
|
||||
postage/~Nmg
|
||||
postal/~J
|
||||
postbag/NgS
|
||||
|
|
@ -40034,7 +40051,7 @@ prizefighter/Ng
|
|||
prizefighting/Ng
|
||||
prizewinner/NgS
|
||||
prizewinning/J
|
||||
pro/~NSgPJ
|
||||
pro/~NSgPJ(
|
||||
probabilistic/~J
|
||||
probability/~NSg
|
||||
probable/~JNSg
|
||||
|
|
@ -40316,6 +40333,7 @@ protein/~NwSg
|
|||
protest/NwgS
|
||||
protestant/~JNgS
|
||||
protestation/NwgS
|
||||
# proto # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
protocol/~NwgSV
|
||||
proton/~NSg
|
||||
protoplasm/Nmg
|
||||
|
|
@ -40389,7 +40407,7 @@ psaltery/NSg
|
|||
psephologist/NS
|
||||
psephology/N
|
||||
pseud/NS
|
||||
pseudo/~NSJ
|
||||
pseudo/~NSJ(
|
||||
pseudocode/NmgG
|
||||
pseudonym/~NSg
|
||||
pseudonymous/~J
|
||||
|
|
@ -41244,7 +41262,7 @@ razz/NgSVGd
|
|||
razzmatazz/Ng
|
||||
rcpt/N
|
||||
rd/~N
|
||||
re/PNSgvz
|
||||
re/PNSgvz(
|
||||
reach/~VdGSNgB
|
||||
reachable/~JNU
|
||||
reacquire/VdSG
|
||||
|
|
@ -42130,7 +42148,7 @@ retributive/J
|
|||
retrieval/~NSg
|
||||
retrieve/~Vd>GSNgZB
|
||||
retriever/Ng
|
||||
retro/~JNmgS
|
||||
retro/~JNmgS(
|
||||
retroactive/~JY
|
||||
retrofire/NSVGdJ
|
||||
retrofit/~VSNg
|
||||
|
|
@ -43736,7 +43754,7 @@ semaphore/NSgVdG
|
|||
semblance/NSgr
|
||||
semen/~Nmg
|
||||
semester/~NSg
|
||||
semi/~NgS
|
||||
semi/~NgS(
|
||||
semiannual/JYN
|
||||
semiarid/J
|
||||
semiautomatic/JNgSQ
|
||||
|
|
@ -46775,7 +46793,7 @@ suasion/NgE
|
|||
suave/J>Y^Np
|
||||
suaveness/Ng
|
||||
suavity/Ng
|
||||
sub/~NSgVP
|
||||
sub/~NSgVP(
|
||||
subaltern/JNgS
|
||||
subaqua/J
|
||||
subarctic/~ONJ
|
||||
|
|
@ -47144,7 +47162,7 @@ suntanning/V6
|
|||
suntrap/NS
|
||||
sunup/Ng
|
||||
sup/~V>SNgJZ
|
||||
super/~JNgV
|
||||
super/~JNgV(
|
||||
superabundance/NwgS
|
||||
superabundant/J
|
||||
superannuate/VGdSn
|
||||
|
|
@ -47283,6 +47301,7 @@ supremacy/~Ng
|
|||
supreme/~JYVN
|
||||
supremo/NS
|
||||
supt/V
|
||||
# sur # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
surcease/NSgVdG
|
||||
surcharge/NSgVdG
|
||||
surcingle/NSgV
|
||||
|
|
@ -48004,6 +48023,7 @@ teetotalism/Ng
|
|||
teetotaller/NgS!@_
|
||||
tektite/NSg
|
||||
tel/~N
|
||||
# tele # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
telecast/~VG>SNgZ
|
||||
telecaster/Ng
|
||||
telecom/NgS
|
||||
|
|
@ -49152,7 +49172,7 @@ tranquilizer/Ng
|
|||
tranquillise/Vd>SGZ!_
|
||||
tranquilliser/Ng!_
|
||||
tranquillity/Ng!_
|
||||
trans/~JNVi
|
||||
trans/~JNVi(
|
||||
transact/VdGS
|
||||
transaction/~NSg
|
||||
transactional/J
|
||||
|
|
@ -49372,6 +49392,7 @@ tress/NgSVE
|
|||
trestle/~NgS
|
||||
trews/N
|
||||
trey/~NgS
|
||||
# tri # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
triad/~NSg
|
||||
triage/NmgVd
|
||||
triager/NSg
|
||||
|
|
@ -49900,7 +49921,7 @@ ulterior/J
|
|||
ultimate/~JYNgV
|
||||
ultimatum/~NgS
|
||||
ultimo/~JN
|
||||
ultra/~JNSg
|
||||
ultra/~JNSg(
|
||||
ultraconservative/JNSg
|
||||
ultrahigh/J
|
||||
ultraist/NSg
|
||||
|
|
@ -49929,6 +49950,7 @@ umlaut/NgSV
|
|||
ump/NSgVGd
|
||||
umpire/~NgSVGd
|
||||
umpteen/H
|
||||
# un # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
unabridged/~JNgS
|
||||
unacceptability/Nmg
|
||||
unacceptable/~JN
|
||||
|
|
@ -50026,7 +50048,7 @@ undecided/~JNSgV
|
|||
undefine/VGdS
|
||||
undemonstrative/JY
|
||||
undeniably/Ry
|
||||
under/~PJN
|
||||
under/~PJN(
|
||||
underachieve/VGd>SLZ
|
||||
underachiever/Ng
|
||||
underact/VSdG
|
||||
|
|
@ -50259,6 +50281,7 @@ unhealthy/~J^
|
|||
unhistorical/J
|
||||
unholy/~J^
|
||||
unhurt/J
|
||||
# uni # prefixes that are not also words in their own right don't belong in the dictionary
|
||||
unibody/NSg
|
||||
unicameral/~J
|
||||
unicellular/JN
|
||||
|
|
@ -51037,7 +51060,7 @@ vicar/~NSg
|
|||
vicarage/~NSg
|
||||
vicarious/JYp
|
||||
vicariousness/Ng
|
||||
vice/~NgSVJPe
|
||||
vice/~NgSVJPe(
|
||||
viced/JVtT
|
||||
vicegerent/NSgJ
|
||||
vicennial/JN
|
||||
|
|
@ -53414,7 +53437,7 @@ pentest/VSdG
|
|||
pentester/NSg # penetration tester
|
||||
pentesting/NmgV6
|
||||
postfix/NgSVdG
|
||||
pre/~PNV # !! please check and comment !! dictionaries only list prefix pre-
|
||||
pre/~PNV( # !! please check and comment !! dictionaries only list prefix pre-
|
||||
preshared/J
|
||||
quadtree/NgS # data structure
|
||||
quicksort/NgSVdG # algo
|
||||
|
|
|
|||
|
|
@ -18,12 +18,20 @@ use crate::{Document, TokenKind, TokenStringExt};
|
|||
/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
|
||||
pub struct DictWordMetadata {
|
||||
/// The main parts of speech which have extra data.
|
||||
pub noun: Option<NounData>,
|
||||
pub pronoun: Option<PronounData>,
|
||||
pub verb: Option<VerbData>,
|
||||
pub adjective: Option<AdjectiveData>,
|
||||
pub adverb: Option<AdverbData>,
|
||||
pub conjunction: Option<ConjunctionData>,
|
||||
pub determiner: Option<DeterminerData>,
|
||||
pub affix: Option<AffixData>,
|
||||
/// Parts of speech which don't have extra data.
|
||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||
#[serde(default = "default_false")]
|
||||
pub preposition: bool,
|
||||
/// Whether the word is an offensive word.
|
||||
pub swear: Option<bool>,
|
||||
/// The dialects this word belongs to.
|
||||
/// If no dialects are defined, it can be assumed that the word is
|
||||
|
|
@ -33,11 +41,6 @@ pub struct DictWordMetadata {
|
|||
/// Orthographic information: letter case, spaces, hyphens, etc.
|
||||
#[serde(default = "OrthFlags::empty")]
|
||||
pub orth_info: OrthFlags,
|
||||
/// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
|
||||
pub determiner: Option<DeterminerData>,
|
||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||
#[serde(default = "default_false")]
|
||||
pub preposition: bool,
|
||||
/// Whether the word is considered especially common.
|
||||
#[serde(default = "default_false")]
|
||||
pub common: bool,
|
||||
|
|
@ -189,11 +192,12 @@ impl DictWordMetadata {
|
|||
adjective: merge!(self.adjective, other.adjective),
|
||||
adverb: merge!(self.adverb, other.adverb),
|
||||
conjunction: merge!(self.conjunction, other.conjunction),
|
||||
determiner: merge!(self.determiner, other.determiner),
|
||||
affix: merge!(self.affix, other.affix),
|
||||
preposition: self.preposition || other.preposition,
|
||||
dialects: self.dialects | other.dialects,
|
||||
orth_info: self.orth_info | other.orth_info,
|
||||
swear: self.swear.or(other.swear),
|
||||
determiner: merge!(self.determiner, other.determiner),
|
||||
preposition: self.preposition || other.preposition,
|
||||
common: self.common || other.common,
|
||||
derived_from: self.derived_from.or(other.derived_from),
|
||||
pos_tag: self.pos_tag.or(other.pos_tag),
|
||||
|
|
@ -234,6 +238,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
PROPN => {
|
||||
|
|
@ -259,6 +264,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
PRON => {
|
||||
|
|
@ -272,6 +278,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
VERB => {
|
||||
|
|
@ -293,6 +300,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
AUX => {
|
||||
|
|
@ -314,6 +322,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADJ => {
|
||||
|
|
@ -327,6 +336,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADV => {
|
||||
|
|
@ -340,6 +350,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
ADP => {
|
||||
|
|
@ -350,6 +361,7 @@ impl DictWordMetadata {
|
|||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = true;
|
||||
}
|
||||
DET => {
|
||||
|
|
@ -359,6 +371,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.adverb = None;
|
||||
self.conjunction = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
self.determiner = Some(DeterminerData::default());
|
||||
}
|
||||
|
|
@ -373,6 +386,7 @@ impl DictWordMetadata {
|
|||
self.adjective = None;
|
||||
self.adverb = None;
|
||||
self.determiner = None;
|
||||
self.affix = None;
|
||||
self.preposition = false;
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -958,6 +972,22 @@ impl ConjunctionData {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
|
||||
pub struct AffixData {
|
||||
pub is_prefix: Option<bool>,
|
||||
pub is_suffix: Option<bool>,
|
||||
}
|
||||
|
||||
impl AffixData {
|
||||
/// Produce a copy of `self` with the known properties of `other` set.
|
||||
pub fn or(&self, _other: &Self) -> Self {
|
||||
Self {
|
||||
is_prefix: self.is_prefix.or(_other.is_prefix),
|
||||
is_suffix: self.is_suffix.or(_other.is_suffix),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A regional dialect.
|
||||
///
|
||||
/// Note: these have bit-shifted values so that they can ergonomically integrate with
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue