mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
feat: adding prefixes to dictionary (#2212)
* feat: adding prefixes to dictionary * feat: `AffixData` for `DictWordMetadata`
This commit is contained in:
parent
6ac8406e29
commit
f15778ed28
3 changed files with 112 additions and 51 deletions
|
|
@ -997,6 +997,14 @@
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"//": "not yet implemented"
|
"//": "not yet implemented"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"(": {
|
||||||
|
"#": "prefix property",
|
||||||
|
"metadata": {
|
||||||
|
"affix": {
|
||||||
|
"is_prefix": true
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11157,7 +11157,7 @@ Zworykin/g
|
||||||
Zyrtec/g
|
Zyrtec/g
|
||||||
Zyuganov/g
|
Zyuganov/g
|
||||||
Zzz
|
Zzz
|
||||||
a/~DP
|
a/~DP(
|
||||||
a.m./
|
a.m./
|
||||||
aah/NV
|
aah/NV
|
||||||
aardvark/~NSg
|
aardvark/~NSg
|
||||||
|
|
@ -12585,7 +12585,7 @@ antagonist/~NSg
|
||||||
antagonistic/~JQ
|
antagonistic/~JQ
|
||||||
antagonize/VdSG
|
antagonize/VdSG
|
||||||
antarctic/~J
|
antarctic/~J
|
||||||
ante/~NSgV
|
ante/~NSgV(
|
||||||
anteater/NgS
|
anteater/NgS
|
||||||
antebellum/~J
|
antebellum/~J
|
||||||
antecedence/Nmg
|
antecedence/Nmg
|
||||||
|
|
@ -12620,7 +12620,7 @@ anthropomorphise/V!_
|
||||||
anthropomorphism/Nmg
|
anthropomorphism/Nmg
|
||||||
anthropomorphize/V
|
anthropomorphize/V
|
||||||
anthropomorphous/J
|
anthropomorphous/J
|
||||||
anti/~JNSgP
|
anti/~JNSgP(
|
||||||
antiabortion/J
|
antiabortion/J
|
||||||
antiabortionist/NgS
|
antiabortionist/NgS
|
||||||
antiaircraft/JN
|
antiaircraft/JN
|
||||||
|
|
@ -12949,7 +12949,7 @@ arbutus/NgS
|
||||||
arc/~NSgVdG
|
arc/~NSgVdG
|
||||||
arcade/~NgSV
|
arcade/~NgSV
|
||||||
arcane/~J
|
arcane/~J
|
||||||
arch/~NgSVGd>J^YpZv
|
arch/~NgSVGd>J^YpZv(
|
||||||
archaeological/~JY
|
archaeological/~JY
|
||||||
archaeologist/~NSg
|
archaeologist/~NSg
|
||||||
archaeology/~Nmg
|
archaeology/~Nmg
|
||||||
|
|
@ -13508,7 +13508,7 @@ authorized/~JVtT
|
||||||
authorship/~Nmg
|
authorship/~Nmg
|
||||||
autism/~Nmg
|
autism/~Nmg
|
||||||
autistic/~JN
|
autistic/~JN
|
||||||
auto/~JNgSV
|
auto/~JNgSV(
|
||||||
autobahn/~NSg
|
autobahn/~NSg
|
||||||
autobiographer/NSg
|
autobiographer/NSg
|
||||||
autobiographic/J
|
autobiographic/J
|
||||||
|
|
@ -14436,6 +14436,7 @@ bend/~VbG>SNgBZ
|
||||||
bendability/Nmg
|
bendability/Nmg
|
||||||
bender/~Ng
|
bender/~Ng
|
||||||
bendy/J^>N
|
bendy/J^>N
|
||||||
|
# bene # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
beneath/~P
|
beneath/~P
|
||||||
benedictine/~
|
benedictine/~
|
||||||
benediction/NwSg
|
benediction/NwSg
|
||||||
|
|
@ -14572,7 +14573,7 @@ bezel/NgS
|
||||||
bezier/NgS
|
bezier/NgS
|
||||||
bf/~N
|
bf/~N
|
||||||
bhaji/N
|
bhaji/N
|
||||||
bi/~J>NSgZ
|
bi/~J>NSgZ(
|
||||||
biannual/JYN
|
biannual/JYN
|
||||||
bias/~NgSVGdJ
|
bias/~NgSVGdJ
|
||||||
biased/~JVtTU
|
biased/~JVtTU
|
||||||
|
|
@ -16892,6 +16893,7 @@ center/~NgJVdG
|
||||||
centerboard/NSg
|
centerboard/NSg
|
||||||
centerfold/NgS
|
centerfold/NgS
|
||||||
centerpiece/~NgS
|
centerpiece/~NgS
|
||||||
|
# centi # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
centigrade/JN
|
centigrade/JN
|
||||||
centigram/NSg
|
centigram/NSg
|
||||||
centiliter/NgS<
|
centiliter/NgS<
|
||||||
|
|
@ -17554,6 +17556,7 @@ circularize/VdSG
|
||||||
circulate/~VdSGr
|
circulate/~VdSGr
|
||||||
circulation/~NSgr
|
circulation/~NSgr
|
||||||
circulatory/JN
|
circulatory/JN
|
||||||
|
# circum # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
circumcise/VdSGXn
|
circumcise/VdSGXn
|
||||||
circumcised/JVtTNU
|
circumcised/JVtTNU
|
||||||
circumcision/~Ng
|
circumcision/~Ng
|
||||||
|
|
@ -17913,7 +17916,7 @@ clxvi
|
||||||
clxvii
|
clxvii
|
||||||
cm/~
|
cm/~
|
||||||
cnidarian/NgS
|
cnidarian/NgS
|
||||||
co/~NSIdE
|
co/~NSIdE(
|
||||||
coach/~NgSVdG
|
coach/~NgSVdG
|
||||||
coachload/NS
|
coachload/NS
|
||||||
coachman/N0g
|
coachman/N0g
|
||||||
|
|
@ -20368,6 +20371,7 @@ dc/~N
|
||||||
dd/~NSdG
|
dd/~NSdG
|
||||||
dded/K
|
dded/K
|
||||||
dding/K
|
dding/K
|
||||||
|
# de # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
deacon/~NgSV
|
deacon/~NgSV
|
||||||
deaconess/NgS
|
deaconess/NgS
|
||||||
dead/~J^Y>NgVXn
|
dead/~J^Y>NgVXn
|
||||||
|
|
@ -20439,6 +20443,7 @@ debtor/~NgS
|
||||||
debugger/NSg
|
debugger/NSg
|
||||||
debut/~NgVGd
|
debut/~NgVGd
|
||||||
debutante/NSg
|
debutante/NSg
|
||||||
|
# deca # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
decade/~NgS
|
decade/~NgS
|
||||||
decadence/~Nmg
|
decadence/~Nmg
|
||||||
decadency/Nmg
|
decadency/Nmg
|
||||||
|
|
@ -21117,6 +21122,7 @@ dextrose/Nmg
|
||||||
dharma/~NwgS
|
dharma/~NwgS
|
||||||
dhoti/NSg
|
dhoti/NSg
|
||||||
dhow/NgS
|
dhow/NgS
|
||||||
|
# di # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
diabetes/~Nmg
|
diabetes/~Nmg
|
||||||
diabetic/~JNSg
|
diabetic/~JNSg
|
||||||
diabolic/J
|
diabolic/J
|
||||||
|
|
@ -21406,7 +21412,7 @@ dirtball/NS
|
||||||
dirtily/Ry
|
dirtily/Ry
|
||||||
dirtiness/Nmg
|
dirtiness/Nmg
|
||||||
dirty/~J^>VdSGp
|
dirty/~J^>VdSGp
|
||||||
dis/~VNgI
|
dis/~VNgI(
|
||||||
disable/~VdSGJL
|
disable/~VdSGJL
|
||||||
disablement/Ng
|
disablement/Ng
|
||||||
disambiguate/~VSdGn
|
disambiguate/~VSdGn
|
||||||
|
|
@ -22399,6 +22405,7 @@ dynamo/~NSg
|
||||||
dynastic/~J
|
dynastic/~J
|
||||||
dynasty/~NSg
|
dynasty/~NSg
|
||||||
dyno/NSg
|
dyno/NSg
|
||||||
|
# dys # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
dysentery/~Nmg
|
dysentery/~Nmg
|
||||||
dysfunction/~Nmg
|
dysfunction/~Nmg
|
||||||
dysfunctional/~J
|
dysfunctional/~J
|
||||||
|
|
@ -22989,7 +22996,7 @@ emulsification/Nmg
|
||||||
emulsifier/~NgS
|
emulsifier/~NgS
|
||||||
emulsify/Vd>SGnZ
|
emulsify/Vd>SGnZ
|
||||||
emulsion/~NwgSV
|
emulsion/~NwgSV
|
||||||
en/~NSgPI
|
en/~NSgPI(
|
||||||
enable/~Vd>SGZ
|
enable/~Vd>SGZ
|
||||||
enabler/NgS
|
enabler/NgS
|
||||||
enact/~VSdGrL
|
enact/~VSdGrL
|
||||||
|
|
@ -23575,6 +23582,7 @@ etude/NSg
|
||||||
etymological/~JY
|
etymological/~JY
|
||||||
etymologist/NSg
|
etymologist/NSg
|
||||||
etymology/~NwSg
|
etymology/~NwSg
|
||||||
|
# eu # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
eucalypti/N9
|
eucalypti/N9
|
||||||
eucalyptus/~N0gS
|
eucalyptus/~N0gS
|
||||||
euchre/NSgVdG
|
euchre/NSgVdG
|
||||||
|
|
@ -23682,7 +23690,7 @@ evolutionist/NSg
|
||||||
evolve/~VdSG
|
evolve/~VdSG
|
||||||
ewe/~NSg>Z
|
ewe/~NSg>Z
|
||||||
ewer/Ng
|
ewer/Ng
|
||||||
ex/~NgSVJ
|
ex/~NgSVJ(
|
||||||
exabyte/NgS
|
exabyte/NgS
|
||||||
exacerbate/VGdSn
|
exacerbate/VGdSn
|
||||||
exacerbation/Nwg
|
exacerbation/Nwg
|
||||||
|
|
@ -24032,7 +24040,7 @@ extortion/~Ng>Z
|
||||||
extortionate/JY
|
extortionate/JY
|
||||||
extortioner/Ng
|
extortioner/Ng
|
||||||
extortionist/NgS
|
extortionist/NgS
|
||||||
extra/~JNSg
|
extra/~JNSg(
|
||||||
extracellular/~J
|
extracellular/~J
|
||||||
extract/~NgSVdGv
|
extract/~NgSVdGv
|
||||||
extraction/~NwSg
|
extraction/~NwSg
|
||||||
|
|
@ -25314,7 +25322,7 @@ forceps/N09g
|
||||||
forcible/~J
|
forcible/~J
|
||||||
forcibly/~Ry
|
forcibly/~Ry
|
||||||
ford/~NgSVdGB
|
ford/~NgSVdGB
|
||||||
fore/~JNgS
|
fore/~JNgS(
|
||||||
forearm/~NSgVGd
|
forearm/~NSgVGd
|
||||||
forebear/NgSV
|
forebear/NgSV
|
||||||
forebode/VGdSNz
|
forebode/VGdSNz
|
||||||
|
|
@ -28126,7 +28134,7 @@ hesitate/~VdSGnX
|
||||||
hesitating/VNYU
|
hesitating/VNYU
|
||||||
hesitation/~Ng
|
hesitation/~Ng
|
||||||
hessian/~N
|
hessian/~N
|
||||||
hetero/~JNSg
|
hetero/~JNSg(
|
||||||
heterodox/J
|
heterodox/J
|
||||||
heterodoxy/Nmg
|
heterodoxy/Nmg
|
||||||
heterogeneity/Ng
|
heterogeneity/Ng
|
||||||
|
|
@ -28427,7 +28435,7 @@ homily/~NSg
|
||||||
hominid/NSgJ
|
hominid/NSgJ
|
||||||
hominoid/NS
|
hominoid/NS
|
||||||
hominy/Ng
|
hominy/Ng
|
||||||
homo/~NgSJ
|
homo/~NgSJ(
|
||||||
homoerotic/J
|
homoerotic/J
|
||||||
homogeneity/Ng
|
homogeneity/Ng
|
||||||
homogeneous/~JY
|
homogeneous/~JY
|
||||||
|
|
@ -28917,6 +28925,7 @@ hymn/~NgSVdG
|
||||||
hymnal/~NgSJ
|
hymnal/~NgSJ
|
||||||
hymnbook/NSg
|
hymnbook/NSg
|
||||||
hype/~NmgSVGd>J
|
hype/~NmgSVGd>J
|
||||||
|
# hyper # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
hyperactive/J
|
hyperactive/J
|
||||||
hyperactivity/~Ng
|
hyperactivity/~Ng
|
||||||
hyperaggressive/J
|
hyperaggressive/J
|
||||||
|
|
@ -29113,6 +29122,7 @@ ignore/~VGdS
|
||||||
iguana/~NgS
|
iguana/~NgS
|
||||||
ii/~
|
ii/~
|
||||||
iii/~
|
iii/~
|
||||||
|
# il # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
ilea/N
|
ilea/N
|
||||||
ileitis/Ng
|
ileitis/Ng
|
||||||
ileum/Ng
|
ileum/Ng
|
||||||
|
|
@ -29153,6 +29163,7 @@ illustrative/~JY
|
||||||
illustrator/~NSg
|
illustrator/~NSg
|
||||||
illustrious/~JYp
|
illustrious/~JYp
|
||||||
illustriousness/Nmg
|
illustriousness/Nmg
|
||||||
|
# im # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
image/~NwSgVdG
|
image/~NwSgVdG
|
||||||
imager/NgS
|
imager/NgS
|
||||||
imagery/~Nmg
|
imagery/~Nmg
|
||||||
|
|
@ -29464,7 +29475,7 @@ impure/~JY^>V
|
||||||
impurity/~NSg
|
impurity/~NSg
|
||||||
imputation/NSg
|
imputation/NSg
|
||||||
impute/VdSGB
|
impute/VdSGB
|
||||||
in/~PJRrg # removed `4`, verb senses are obsolete, `NS`, noun sense is marginal
|
in/~PJRrg( # removed `4`, verb senses are obsolete, `NS`, noun sense is marginal
|
||||||
inaccuracy/NwgS
|
inaccuracy/NwgS
|
||||||
inaction/~Nmg
|
inaction/~Nmg
|
||||||
inadequacy/NS
|
inadequacy/NS
|
||||||
|
|
@ -30114,7 +30125,7 @@ intent/~NSgJYp
|
||||||
intention/~NgSV
|
intention/~NgSV
|
||||||
intentional/~JYNU
|
intentional/~JYNU
|
||||||
intentness/Ng
|
intentness/Ng
|
||||||
inter/~VSEL
|
inter/~VSEL(
|
||||||
interact/~VGdSNv
|
interact/~VGdSNv
|
||||||
interaction/~NwSg
|
interaction/~NwSg
|
||||||
interactive/~JYN
|
interactive/~JYN
|
||||||
|
|
@ -30298,6 +30309,7 @@ intonation/~NSg
|
||||||
intoxicant/NSgJ
|
intoxicant/NSgJ
|
||||||
intoxicate/VdSGJn
|
intoxicate/VdSGJn
|
||||||
intoxication/~Ng
|
intoxication/~Ng
|
||||||
|
# intra # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
intracranial/~J
|
intracranial/~J
|
||||||
intramural/~JN
|
intramural/~JN
|
||||||
intramuscular/J
|
intramuscular/J
|
||||||
|
|
@ -30316,7 +30328,7 @@ intriguer/Ng
|
||||||
intriguing/~JYV6N
|
intriguing/~JYV6N
|
||||||
intrinsic/~JNgS
|
intrinsic/~JNgS
|
||||||
intrinsically/~Ry
|
intrinsically/~Ry
|
||||||
intro/~NSgV
|
intro/~NSgV(
|
||||||
introduce/~VGdSr
|
introduce/~VGdSr
|
||||||
introduction/~N0gr
|
introduction/~N0gr
|
||||||
introductions/~N9
|
introductions/~N9
|
||||||
|
|
@ -31100,7 +31112,7 @@ killer/~NgJ
|
||||||
killing/~JNgV
|
killing/~JNgV
|
||||||
killjoy/NSg
|
killjoy/NSg
|
||||||
kiln/~NgSVdG
|
kiln/~NgSVdG
|
||||||
kilo/~NgS
|
kilo/~NgS(
|
||||||
kilobit/NSg
|
kilobit/NSg
|
||||||
kilobyte/NSg
|
kilobyte/NSg
|
||||||
kilocoulomb/S
|
kilocoulomb/S
|
||||||
|
|
@ -32654,7 +32666,7 @@ mackerel/~NwSg
|
||||||
mackinaw/NSg
|
mackinaw/NSg
|
||||||
mackintosh/~NgS
|
mackintosh/~NgS
|
||||||
macrame/NgV
|
macrame/NgV
|
||||||
macro/~JNSg
|
macro/~JNSg(
|
||||||
macroaggregate/Ng
|
macroaggregate/Ng
|
||||||
macrobiotic/JS
|
macrobiotic/JS
|
||||||
macrobiotics/Nwg
|
macrobiotics/Nwg
|
||||||
|
|
@ -33383,7 +33395,7 @@ meeting/~NwgSV
|
||||||
meetinghouse/NSg
|
meetinghouse/NSg
|
||||||
meetup/NgS
|
meetup/NgS
|
||||||
meg/~NSV
|
meg/~NSV
|
||||||
mega/~JN
|
mega/~JN(
|
||||||
megabit/NSg
|
megabit/NSg
|
||||||
megabucks/Ng
|
megabucks/Ng
|
||||||
megabyte/NgS
|
megabyte/NgS
|
||||||
|
|
@ -33700,7 +33712,7 @@ mica/~Ng
|
||||||
mice/~N9V
|
mice/~N9V
|
||||||
mick/~NSJ
|
mick/~NSJ
|
||||||
mickey/~NgSV
|
mickey/~NgSV
|
||||||
micro/~JNSgV
|
micro/~JNSgV(
|
||||||
microaggression/NSg
|
microaggression/NSg
|
||||||
microarchitecture/NgS
|
microarchitecture/NgS
|
||||||
microbe/NgS
|
microbe/NgS
|
||||||
|
|
@ -33762,7 +33774,7 @@ microtransaction/NSg
|
||||||
microvascular/J
|
microvascular/J
|
||||||
microwave/~NSgVdGB
|
microwave/~NSgVdGB
|
||||||
microwaveable/J
|
microwaveable/J
|
||||||
mid/~JPN
|
mid/~JPN(
|
||||||
midair/J
|
midair/J
|
||||||
midcentury/J
|
midcentury/J
|
||||||
midday/~Ng
|
midday/~Ng
|
||||||
|
|
@ -33864,6 +33876,7 @@ millennial/JNgS
|
||||||
millennium/~NgS
|
millennium/~NgS
|
||||||
miller/~Ng
|
miller/~Ng
|
||||||
millet/~Ng
|
millet/~Ng
|
||||||
|
# milli # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
milliamp/NgS
|
milliamp/NgS
|
||||||
milliard/Sg
|
milliard/Sg
|
||||||
millibar/NgS
|
millibar/NgS
|
||||||
|
|
@ -33929,7 +33942,7 @@ minestrone/Nmg
|
||||||
minesweeper/NSg
|
minesweeper/NSg
|
||||||
mingle/VdGSN
|
mingle/VdGSN
|
||||||
mingy/J
|
mingy/J
|
||||||
mini/~JNgS
|
mini/~JNgS(
|
||||||
miniature/~NgSJV
|
miniature/~NgSJV
|
||||||
miniaturisation/Ng!_
|
miniaturisation/Ng!_
|
||||||
miniaturise/VGdS!_
|
miniaturise/VGdS!_
|
||||||
|
|
@ -34001,6 +34014,7 @@ mirthful/JYp
|
||||||
mirthfulness/Nmg
|
mirthfulness/Nmg
|
||||||
mirthless/JY
|
mirthless/JY
|
||||||
miry/J>^
|
miry/J>^
|
||||||
|
# mis # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
misaddress/VdSG
|
misaddress/VdSG
|
||||||
misadventure/NwgS
|
misadventure/NwgS
|
||||||
misaligned/JV
|
misaligned/JV
|
||||||
|
|
@ -34400,7 +34414,7 @@ monkey/~NgSVdG
|
||||||
monkeyshine/NSg
|
monkeyshine/NSg
|
||||||
monkish/J
|
monkish/J
|
||||||
monkshood/NSg
|
monkshood/NSg
|
||||||
mono/~NgJ
|
mono/~NgJ(
|
||||||
monochromatic/~J
|
monochromatic/~J
|
||||||
monochrome/~NgSJ
|
monochrome/~NgSJ
|
||||||
monocle/NSgd
|
monocle/NSgd
|
||||||
|
|
@ -34774,7 +34788,7 @@ mullet/~NgS
|
||||||
mulligan/~NSg
|
mulligan/~NSg
|
||||||
mulligatawny/Ng
|
mulligatawny/Ng
|
||||||
mullion/NSgVd
|
mullion/NSgVd
|
||||||
multi/~N
|
multi/~N(
|
||||||
multibillion/J
|
multibillion/J
|
||||||
multibyte/J
|
multibyte/J
|
||||||
multicellular/J
|
multicellular/J
|
||||||
|
|
@ -35273,6 +35287,7 @@ nelson/~NSg
|
||||||
nematode/NSg
|
nematode/NSg
|
||||||
nemeses/N9
|
nemeses/N9
|
||||||
nemesis/~N0g
|
nemesis/~N0g
|
||||||
|
# neo # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
neoclassic/J
|
neoclassic/J
|
||||||
neoclassical/~JN
|
neoclassical/~JN
|
||||||
neoclassicism/Nmg
|
neoclassicism/Nmg
|
||||||
|
|
@ -35594,7 +35609,7 @@ nomination's/r
|
||||||
nominative/~JNSg
|
nominative/~JNSg
|
||||||
nominator/~NSge
|
nominator/~NSge
|
||||||
nominee/~NgS
|
nominee/~NgS
|
||||||
non/~N
|
non/~N(
|
||||||
nonabrasive/JN
|
nonabrasive/JN
|
||||||
nonabsorbent/JSg
|
nonabsorbent/JSg
|
||||||
nonacademic/JN
|
nonacademic/JN
|
||||||
|
|
@ -36462,6 +36477,7 @@ omission/~NwgS
|
||||||
omit/~VS
|
omit/~VS
|
||||||
omitted/~V
|
omitted/~V
|
||||||
omitting/~VN
|
omitting/~VN
|
||||||
|
# omni # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
omnibus/~NgSJV
|
omnibus/~NgSJV
|
||||||
omnidirectional/J
|
omnidirectional/J
|
||||||
omnipotence/Nmg
|
omnipotence/Nmg
|
||||||
|
|
@ -36779,7 +36795,7 @@ ourself/Ia1F # I:pronoun a:personal 1:person .~singular F:reflexive (of t
|
||||||
ourselves/~Ia1F: # I:pronoun a:personal 1:person :~plural F:reflexive
|
ourselves/~Ia1F: # I:pronoun a:personal 1:person :~plural F:reflexive
|
||||||
oust/~VGd>SZ
|
oust/~VGd>SZ
|
||||||
ouster/~NgSV
|
ouster/~NgSV
|
||||||
out/~PNSgVGd>JRz
|
out/~PNSgVGd>JRz(
|
||||||
outage/NSg
|
outage/NSg
|
||||||
outargue/VGdS
|
outargue/VGdS
|
||||||
outback/~NgSJV
|
outback/~NgSJV
|
||||||
|
|
@ -36942,7 +36958,7 @@ oven/~NgSV
|
||||||
ovenbird/NSg
|
ovenbird/NSg
|
||||||
ovenproof/J
|
ovenproof/J
|
||||||
ovenware/Nmg
|
ovenware/Nmg
|
||||||
over/~JYNgSP
|
over/~JYNgSP(
|
||||||
overabundance/Ng
|
overabundance/Ng
|
||||||
overabundant/J
|
overabundant/J
|
||||||
overachieve/VGd>SZ
|
overachieve/VGd>SZ
|
||||||
|
|
@ -37439,7 +37455,7 @@ pampas/Ng
|
||||||
pamper/VdGSN
|
pamper/VdGSN
|
||||||
pamphlet/~NgSV
|
pamphlet/~NgSV
|
||||||
pamphleteer/NgSV
|
pamphleteer/NgSV
|
||||||
pan/~NSgVJ
|
pan/~NSgVJ(
|
||||||
panacea/NSg
|
panacea/NSg
|
||||||
panache/Ng
|
panache/Ng
|
||||||
panama/~NgS
|
panama/~NgS
|
||||||
|
|
@ -37528,7 +37544,7 @@ paprika/~NmgJ
|
||||||
papyri/~N9
|
papyri/~N9
|
||||||
papyrus/~N0g
|
papyrus/~N0g
|
||||||
par/~NSgJ>PVGdZBz
|
par/~NSgJ>PVGdZBz
|
||||||
para/~NgSJ
|
para/~NgSJ(
|
||||||
parable/~NgSVJ
|
parable/~NgSVJ
|
||||||
parabola/N0Sg
|
parabola/N0Sg
|
||||||
parabolæ/N9
|
parabolæ/N9
|
||||||
|
|
@ -38137,6 +38153,7 @@ peppy/J^>Np
|
||||||
pepsin/Ng
|
pepsin/Ng
|
||||||
peptic/JNgS
|
peptic/JNgS
|
||||||
peptide/~NS
|
peptide/~NS
|
||||||
|
# per # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
peradventure/Ng
|
peradventure/Ng
|
||||||
perambulate/VGdSXn
|
perambulate/VGdSXn
|
||||||
perambulation/Nwg
|
perambulation/Nwg
|
||||||
|
|
@ -39241,7 +39258,7 @@ polonaise/NSgV
|
||||||
polonium/Nmg
|
polonium/Nmg
|
||||||
poltergeist/~NgS
|
poltergeist/~NgS
|
||||||
poltroon/NSgJ
|
poltroon/NSgJ
|
||||||
poly/~NJV
|
poly/~NJV(
|
||||||
polyacrylamide/N
|
polyacrylamide/N
|
||||||
polyamory/NS
|
polyamory/NS
|
||||||
polyandrous/J
|
polyandrous/J
|
||||||
|
|
@ -39448,7 +39465,7 @@ possibility/~NSg
|
||||||
possible/~JNSg
|
possible/~JNSg
|
||||||
possibly/~R # adverb of probability/certainty/affirmation; modal adverb
|
possibly/~R # adverb of probability/certainty/affirmation; modal adverb
|
||||||
possum/~NSgV
|
possum/~NSgV
|
||||||
post/~NwgSVGd>PZz
|
post/~NwgSVGd>PZz(
|
||||||
postage/~Nmg
|
postage/~Nmg
|
||||||
postal/~J
|
postal/~J
|
||||||
postbag/NgS
|
postbag/NgS
|
||||||
|
|
@ -40034,7 +40051,7 @@ prizefighter/Ng
|
||||||
prizefighting/Ng
|
prizefighting/Ng
|
||||||
prizewinner/NgS
|
prizewinner/NgS
|
||||||
prizewinning/J
|
prizewinning/J
|
||||||
pro/~NSgPJ
|
pro/~NSgPJ(
|
||||||
probabilistic/~J
|
probabilistic/~J
|
||||||
probability/~NSg
|
probability/~NSg
|
||||||
probable/~JNSg
|
probable/~JNSg
|
||||||
|
|
@ -40316,6 +40333,7 @@ protein/~NwSg
|
||||||
protest/NwgS
|
protest/NwgS
|
||||||
protestant/~JNgS
|
protestant/~JNgS
|
||||||
protestation/NwgS
|
protestation/NwgS
|
||||||
|
# proto # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
protocol/~NwgSV
|
protocol/~NwgSV
|
||||||
proton/~NSg
|
proton/~NSg
|
||||||
protoplasm/Nmg
|
protoplasm/Nmg
|
||||||
|
|
@ -40389,7 +40407,7 @@ psaltery/NSg
|
||||||
psephologist/NS
|
psephologist/NS
|
||||||
psephology/N
|
psephology/N
|
||||||
pseud/NS
|
pseud/NS
|
||||||
pseudo/~NSJ
|
pseudo/~NSJ(
|
||||||
pseudocode/NmgG
|
pseudocode/NmgG
|
||||||
pseudonym/~NSg
|
pseudonym/~NSg
|
||||||
pseudonymous/~J
|
pseudonymous/~J
|
||||||
|
|
@ -41244,7 +41262,7 @@ razz/NgSVGd
|
||||||
razzmatazz/Ng
|
razzmatazz/Ng
|
||||||
rcpt/N
|
rcpt/N
|
||||||
rd/~N
|
rd/~N
|
||||||
re/PNSgvz
|
re/PNSgvz(
|
||||||
reach/~VdGSNgB
|
reach/~VdGSNgB
|
||||||
reachable/~JNU
|
reachable/~JNU
|
||||||
reacquire/VdSG
|
reacquire/VdSG
|
||||||
|
|
@ -42130,7 +42148,7 @@ retributive/J
|
||||||
retrieval/~NSg
|
retrieval/~NSg
|
||||||
retrieve/~Vd>GSNgZB
|
retrieve/~Vd>GSNgZB
|
||||||
retriever/Ng
|
retriever/Ng
|
||||||
retro/~JNmgS
|
retro/~JNmgS(
|
||||||
retroactive/~JY
|
retroactive/~JY
|
||||||
retrofire/NSVGdJ
|
retrofire/NSVGdJ
|
||||||
retrofit/~VSNg
|
retrofit/~VSNg
|
||||||
|
|
@ -43736,7 +43754,7 @@ semaphore/NSgVdG
|
||||||
semblance/NSgr
|
semblance/NSgr
|
||||||
semen/~Nmg
|
semen/~Nmg
|
||||||
semester/~NSg
|
semester/~NSg
|
||||||
semi/~NgS
|
semi/~NgS(
|
||||||
semiannual/JYN
|
semiannual/JYN
|
||||||
semiarid/J
|
semiarid/J
|
||||||
semiautomatic/JNgSQ
|
semiautomatic/JNgSQ
|
||||||
|
|
@ -46775,7 +46793,7 @@ suasion/NgE
|
||||||
suave/J>Y^Np
|
suave/J>Y^Np
|
||||||
suaveness/Ng
|
suaveness/Ng
|
||||||
suavity/Ng
|
suavity/Ng
|
||||||
sub/~NSgVP
|
sub/~NSgVP(
|
||||||
subaltern/JNgS
|
subaltern/JNgS
|
||||||
subaqua/J
|
subaqua/J
|
||||||
subarctic/~ONJ
|
subarctic/~ONJ
|
||||||
|
|
@ -47144,7 +47162,7 @@ suntanning/V6
|
||||||
suntrap/NS
|
suntrap/NS
|
||||||
sunup/Ng
|
sunup/Ng
|
||||||
sup/~V>SNgJZ
|
sup/~V>SNgJZ
|
||||||
super/~JNgV
|
super/~JNgV(
|
||||||
superabundance/NwgS
|
superabundance/NwgS
|
||||||
superabundant/J
|
superabundant/J
|
||||||
superannuate/VGdSn
|
superannuate/VGdSn
|
||||||
|
|
@ -47283,6 +47301,7 @@ supremacy/~Ng
|
||||||
supreme/~JYVN
|
supreme/~JYVN
|
||||||
supremo/NS
|
supremo/NS
|
||||||
supt/V
|
supt/V
|
||||||
|
# sur # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
surcease/NSgVdG
|
surcease/NSgVdG
|
||||||
surcharge/NSgVdG
|
surcharge/NSgVdG
|
||||||
surcingle/NSgV
|
surcingle/NSgV
|
||||||
|
|
@ -48004,6 +48023,7 @@ teetotalism/Ng
|
||||||
teetotaller/NgS!@_
|
teetotaller/NgS!@_
|
||||||
tektite/NSg
|
tektite/NSg
|
||||||
tel/~N
|
tel/~N
|
||||||
|
# tele # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
telecast/~VG>SNgZ
|
telecast/~VG>SNgZ
|
||||||
telecaster/Ng
|
telecaster/Ng
|
||||||
telecom/NgS
|
telecom/NgS
|
||||||
|
|
@ -49152,7 +49172,7 @@ tranquilizer/Ng
|
||||||
tranquillise/Vd>SGZ!_
|
tranquillise/Vd>SGZ!_
|
||||||
tranquilliser/Ng!_
|
tranquilliser/Ng!_
|
||||||
tranquillity/Ng!_
|
tranquillity/Ng!_
|
||||||
trans/~JNVi
|
trans/~JNVi(
|
||||||
transact/VdGS
|
transact/VdGS
|
||||||
transaction/~NSg
|
transaction/~NSg
|
||||||
transactional/J
|
transactional/J
|
||||||
|
|
@ -49372,6 +49392,7 @@ tress/NgSVE
|
||||||
trestle/~NgS
|
trestle/~NgS
|
||||||
trews/N
|
trews/N
|
||||||
trey/~NgS
|
trey/~NgS
|
||||||
|
# tri # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
triad/~NSg
|
triad/~NSg
|
||||||
triage/NmgVd
|
triage/NmgVd
|
||||||
triager/NSg
|
triager/NSg
|
||||||
|
|
@ -49900,7 +49921,7 @@ ulterior/J
|
||||||
ultimate/~JYNgV
|
ultimate/~JYNgV
|
||||||
ultimatum/~NgS
|
ultimatum/~NgS
|
||||||
ultimo/~JN
|
ultimo/~JN
|
||||||
ultra/~JNSg
|
ultra/~JNSg(
|
||||||
ultraconservative/JNSg
|
ultraconservative/JNSg
|
||||||
ultrahigh/J
|
ultrahigh/J
|
||||||
ultraist/NSg
|
ultraist/NSg
|
||||||
|
|
@ -49929,6 +49950,7 @@ umlaut/NgSV
|
||||||
ump/NSgVGd
|
ump/NSgVGd
|
||||||
umpire/~NgSVGd
|
umpire/~NgSVGd
|
||||||
umpteen/H
|
umpteen/H
|
||||||
|
# un # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
unabridged/~JNgS
|
unabridged/~JNgS
|
||||||
unacceptability/Nmg
|
unacceptability/Nmg
|
||||||
unacceptable/~JN
|
unacceptable/~JN
|
||||||
|
|
@ -50026,7 +50048,7 @@ undecided/~JNSgV
|
||||||
undefine/VGdS
|
undefine/VGdS
|
||||||
undemonstrative/JY
|
undemonstrative/JY
|
||||||
undeniably/Ry
|
undeniably/Ry
|
||||||
under/~PJN
|
under/~PJN(
|
||||||
underachieve/VGd>SLZ
|
underachieve/VGd>SLZ
|
||||||
underachiever/Ng
|
underachiever/Ng
|
||||||
underact/VSdG
|
underact/VSdG
|
||||||
|
|
@ -50259,6 +50281,7 @@ unhealthy/~J^
|
||||||
unhistorical/J
|
unhistorical/J
|
||||||
unholy/~J^
|
unholy/~J^
|
||||||
unhurt/J
|
unhurt/J
|
||||||
|
# uni # prefixes that are not also words in their own right don't belong in the dictionary
|
||||||
unibody/NSg
|
unibody/NSg
|
||||||
unicameral/~J
|
unicameral/~J
|
||||||
unicellular/JN
|
unicellular/JN
|
||||||
|
|
@ -51037,7 +51060,7 @@ vicar/~NSg
|
||||||
vicarage/~NSg
|
vicarage/~NSg
|
||||||
vicarious/JYp
|
vicarious/JYp
|
||||||
vicariousness/Ng
|
vicariousness/Ng
|
||||||
vice/~NgSVJPe
|
vice/~NgSVJPe(
|
||||||
viced/JVtT
|
viced/JVtT
|
||||||
vicegerent/NSgJ
|
vicegerent/NSgJ
|
||||||
vicennial/JN
|
vicennial/JN
|
||||||
|
|
@ -53414,7 +53437,7 @@ pentest/VSdG
|
||||||
pentester/NSg # penetration tester
|
pentester/NSg # penetration tester
|
||||||
pentesting/NmgV6
|
pentesting/NmgV6
|
||||||
postfix/NgSVdG
|
postfix/NgSVdG
|
||||||
pre/~PNV # !! please check and comment !! dictionaries only list prefix pre-
|
pre/~PNV( # !! please check and comment !! dictionaries only list prefix pre-
|
||||||
preshared/J
|
preshared/J
|
||||||
quadtree/NgS # data structure
|
quadtree/NgS # data structure
|
||||||
quicksort/NgSVdG # algo
|
quicksort/NgSVdG # algo
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,20 @@ use crate::{Document, TokenKind, TokenStringExt};
|
||||||
/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
|
/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
|
||||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
|
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
|
||||||
pub struct DictWordMetadata {
|
pub struct DictWordMetadata {
|
||||||
|
/// The main parts of speech which have extra data.
|
||||||
pub noun: Option<NounData>,
|
pub noun: Option<NounData>,
|
||||||
pub pronoun: Option<PronounData>,
|
pub pronoun: Option<PronounData>,
|
||||||
pub verb: Option<VerbData>,
|
pub verb: Option<VerbData>,
|
||||||
pub adjective: Option<AdjectiveData>,
|
pub adjective: Option<AdjectiveData>,
|
||||||
pub adverb: Option<AdverbData>,
|
pub adverb: Option<AdverbData>,
|
||||||
pub conjunction: Option<ConjunctionData>,
|
pub conjunction: Option<ConjunctionData>,
|
||||||
|
pub determiner: Option<DeterminerData>,
|
||||||
|
pub affix: Option<AffixData>,
|
||||||
|
/// Parts of speech which don't have extra data.
|
||||||
|
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
||||||
|
#[serde(default = "default_false")]
|
||||||
|
pub preposition: bool,
|
||||||
|
/// Whether the word is an offensive word.
|
||||||
pub swear: Option<bool>,
|
pub swear: Option<bool>,
|
||||||
/// The dialects this word belongs to.
|
/// The dialects this word belongs to.
|
||||||
/// If no dialects are defined, it can be assumed that the word is
|
/// If no dialects are defined, it can be assumed that the word is
|
||||||
|
|
@ -33,11 +41,6 @@ pub struct DictWordMetadata {
|
||||||
/// Orthographic information: letter case, spaces, hyphens, etc.
|
/// Orthographic information: letter case, spaces, hyphens, etc.
|
||||||
#[serde(default = "OrthFlags::empty")]
|
#[serde(default = "OrthFlags::empty")]
|
||||||
pub orth_info: OrthFlags,
|
pub orth_info: OrthFlags,
|
||||||
/// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
|
|
||||||
pub determiner: Option<DeterminerData>,
|
|
||||||
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
|
|
||||||
#[serde(default = "default_false")]
|
|
||||||
pub preposition: bool,
|
|
||||||
/// Whether the word is considered especially common.
|
/// Whether the word is considered especially common.
|
||||||
#[serde(default = "default_false")]
|
#[serde(default = "default_false")]
|
||||||
pub common: bool,
|
pub common: bool,
|
||||||
|
|
@ -189,11 +192,12 @@ impl DictWordMetadata {
|
||||||
adjective: merge!(self.adjective, other.adjective),
|
adjective: merge!(self.adjective, other.adjective),
|
||||||
adverb: merge!(self.adverb, other.adverb),
|
adverb: merge!(self.adverb, other.adverb),
|
||||||
conjunction: merge!(self.conjunction, other.conjunction),
|
conjunction: merge!(self.conjunction, other.conjunction),
|
||||||
|
determiner: merge!(self.determiner, other.determiner),
|
||||||
|
affix: merge!(self.affix, other.affix),
|
||||||
|
preposition: self.preposition || other.preposition,
|
||||||
dialects: self.dialects | other.dialects,
|
dialects: self.dialects | other.dialects,
|
||||||
orth_info: self.orth_info | other.orth_info,
|
orth_info: self.orth_info | other.orth_info,
|
||||||
swear: self.swear.or(other.swear),
|
swear: self.swear.or(other.swear),
|
||||||
determiner: merge!(self.determiner, other.determiner),
|
|
||||||
preposition: self.preposition || other.preposition,
|
|
||||||
common: self.common || other.common,
|
common: self.common || other.common,
|
||||||
derived_from: self.derived_from.or(other.derived_from),
|
derived_from: self.derived_from.or(other.derived_from),
|
||||||
pos_tag: self.pos_tag.or(other.pos_tag),
|
pos_tag: self.pos_tag.or(other.pos_tag),
|
||||||
|
|
@ -234,6 +238,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
PROPN => {
|
PROPN => {
|
||||||
|
|
@ -259,6 +264,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
PRON => {
|
PRON => {
|
||||||
|
|
@ -272,6 +278,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
VERB => {
|
VERB => {
|
||||||
|
|
@ -293,6 +300,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
AUX => {
|
AUX => {
|
||||||
|
|
@ -314,6 +322,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
ADJ => {
|
ADJ => {
|
||||||
|
|
@ -327,6 +336,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
ADV => {
|
ADV => {
|
||||||
|
|
@ -340,6 +350,7 @@ impl DictWordMetadata {
|
||||||
self.adjective = None;
|
self.adjective = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
ADP => {
|
ADP => {
|
||||||
|
|
@ -350,6 +361,7 @@ impl DictWordMetadata {
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = true;
|
self.preposition = true;
|
||||||
}
|
}
|
||||||
DET => {
|
DET => {
|
||||||
|
|
@ -359,6 +371,7 @@ impl DictWordMetadata {
|
||||||
self.adjective = None;
|
self.adjective = None;
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.conjunction = None;
|
self.conjunction = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
self.determiner = Some(DeterminerData::default());
|
self.determiner = Some(DeterminerData::default());
|
||||||
}
|
}
|
||||||
|
|
@ -373,6 +386,7 @@ impl DictWordMetadata {
|
||||||
self.adjective = None;
|
self.adjective = None;
|
||||||
self.adverb = None;
|
self.adverb = None;
|
||||||
self.determiner = None;
|
self.determiner = None;
|
||||||
|
self.affix = None;
|
||||||
self.preposition = false;
|
self.preposition = false;
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
|
|
@ -958,6 +972,22 @@ impl ConjunctionData {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
|
||||||
|
pub struct AffixData {
|
||||||
|
pub is_prefix: Option<bool>,
|
||||||
|
pub is_suffix: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AffixData {
|
||||||
|
/// Produce a copy of `self` with the known properties of `other` set.
|
||||||
|
pub fn or(&self, _other: &Self) -> Self {
|
||||||
|
Self {
|
||||||
|
is_prefix: self.is_prefix.or(_other.is_prefix),
|
||||||
|
is_suffix: self.is_suffix.or(_other.is_suffix),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A regional dialect.
|
/// A regional dialect.
|
||||||
///
|
///
|
||||||
/// Note: these have bit-shifted values so that they can ergonomically integrate with
|
/// Note: these have bit-shifted values so that they can ergonomically integrate with
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue